diff --git a/build/Dependencies.props b/build/Dependencies.props index 24f3153e4c..c1334615db 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -13,7 +13,7 @@ 0.11.1 1.10.0 1.5.0 - + 4.5.1 2.9.0 4.5.0 1.2.0 diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj index 7757e264b6..7aed922027 100644 --- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj +++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj @@ -12,6 +12,7 @@ + diff --git a/src/Microsoft.ML.Api/ApiUtils.cs b/src/Microsoft.ML.Api/ApiUtils.cs index 96e821f16e..760ed1e768 100644 --- a/src/Microsoft.ML.Api/ApiUtils.cs +++ b/src/Microsoft.ML.Api/ApiUtils.cs @@ -19,11 +19,10 @@ private static OpCode GetAssignmentOpCode(Type t) { // REVIEW: This should be a Dictionary based solution. // DvTypes, strings, arrays, all nullable types, VBuffers and UInt128. - if (t == typeof(DvInt8) || t == typeof(DvInt4) || t == typeof(DvInt2) || t == typeof(DvInt1) || - t == typeof(DvBool) || t == typeof(DvText) || t == typeof(string) || t.IsArray || + if (t == typeof(ReadOnlyMemory) || t == typeof(string) || t.IsArray || (t.IsGenericType && t.GetGenericTypeDefinition() == typeof(VBuffer<>)) || (t.IsGenericType && t.GetGenericTypeDefinition() == typeof(Nullable<>)) || - t == typeof(DvDateTime) || t == typeof(DvDateTimeZone) || t == typeof(DvTimeSpan) || t == typeof(UInt128)) + t == typeof(DateTime) || t == typeof(DateTimeOffset) || t == typeof(TimeSpan) || t == typeof(UInt128)) { return OpCodes.Stobj; } diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs index 6962080a7e..ca7ed9c07e 100644 --- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs @@ -125,61 +125,11 @@ private Delegate CreateGetter(int index) if (outputType.IsArray) { Ch.Assert(colType.IsVector); - // String[] -> VBuffer + // String[] -> ReadOnlyMemory if (outputType.GetElementType() == typeof(string)) { Ch.Assert(colType.ItemType.IsText); - return CreateConvertingArrayGetterDelegate(index, x => x == null ? DvText.NA : new DvText(x)); - } - else if (outputType.GetElementType() == typeof(int)) - { - Ch.Assert(colType.ItemType == NumberType.I4); - return CreateConvertingArrayGetterDelegate(index, x => x); - } - else if (outputType.GetElementType() == typeof(int?)) - { - Ch.Assert(colType.ItemType == NumberType.I4); - return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt4.NA); - } - else if (outputType.GetElementType() == typeof(long)) - { - Ch.Assert(colType.ItemType == NumberType.I8); - return CreateConvertingArrayGetterDelegate(index, x => x); - } - else if (outputType.GetElementType() == typeof(long?)) - { - Ch.Assert(colType.ItemType == NumberType.I8); - return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt8.NA); - } - else if (outputType.GetElementType() == typeof(short)) - { - Ch.Assert(colType.ItemType == NumberType.I2); - return CreateConvertingArrayGetterDelegate(index, x => x); - } - else if (outputType.GetElementType() == typeof(short?)) - { - Ch.Assert(colType.ItemType == NumberType.I2); - return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt2.NA); - } - else if (outputType.GetElementType() == typeof(sbyte)) - { - Ch.Assert(colType.ItemType == NumberType.I1); - return CreateConvertingArrayGetterDelegate(index, x => x); - } - else if (outputType.GetElementType() == typeof(sbyte?)) - { - Ch.Assert(colType.ItemType == NumberType.I1); - return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt1.NA); - } - else if (outputType.GetElementType() == typeof(bool)) - { - Ch.Assert(colType.ItemType.IsBool); - return CreateConvertingArrayGetterDelegate(index, x => x); - } - else if (outputType.GetElementType() == typeof(bool?)) - { - Ch.Assert(colType.ItemType.IsBool); - return CreateConvertingArrayGetterDelegate(index, x => x ?? DvBool.NA); + return CreateConvertingArrayGetterDelegate>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty); } // T[] -> VBuffer @@ -193,7 +143,7 @@ private Delegate CreateGetter(int index) else if (colType.IsVector) { // VBuffer -> VBuffer - // REVIEW: Do we care about accomodating VBuffer -> VBuffer? + // REVIEW: Do we care about accomodating VBuffer -> ReadOnlyMemory? Ch.Assert(outputType.IsGenericType); Ch.Assert(outputType.GetGenericTypeDefinition() == typeof(VBuffer<>)); Ch.Assert(outputType.GetGenericArguments()[0] == colType.ItemType.RawType); @@ -204,70 +154,11 @@ private Delegate CreateGetter(int index) { if (outputType == typeof(string)) { - // String -> DvText + // String -> ReadOnlyMemory Ch.Assert(colType.IsText); - return CreateConvertingGetterDelegate(index, x => x == null ? DvText.NA : new DvText(x)); - } - else if (outputType == typeof(bool)) - { - // Bool -> DvBool - Ch.Assert(colType.IsBool); - return CreateConvertingGetterDelegate(index, x => x); - } - else if (outputType == typeof(bool?)) - { - // Bool? -> DvBool - Ch.Assert(colType.IsBool); - return CreateConvertingGetterDelegate(index, x => x ?? DvBool.NA); - } - else if (outputType == typeof(int)) - { - // int -> DvInt4 - Ch.Assert(colType == NumberType.I4); - return CreateConvertingGetterDelegate(index, x => x); - } - else if (outputType == typeof(int?)) - { - // int? -> DvInt4 - Ch.Assert(colType == NumberType.I4); - return CreateConvertingGetterDelegate(index, x => x ?? DvInt4.NA); - } - else if (outputType == typeof(short)) - { - // short -> DvInt2 - Ch.Assert(colType == NumberType.I2); - return CreateConvertingGetterDelegate(index, x => x); - } - else if (outputType == typeof(short?)) - { - // short? -> DvInt2 - Ch.Assert(colType == NumberType.I2); - return CreateConvertingGetterDelegate(index, x => x ?? DvInt2.NA); - } - else if (outputType == typeof(long)) - { - // long -> DvInt8 - Ch.Assert(colType == NumberType.I8); - return CreateConvertingGetterDelegate(index, x => x); - } - else if (outputType == typeof(long?)) - { - // long? -> DvInt8 - Ch.Assert(colType == NumberType.I8); - return CreateConvertingGetterDelegate(index, x => x ?? DvInt8.NA); - } - else if (outputType == typeof(sbyte)) - { - // sbyte -> DvInt1 - Ch.Assert(colType == NumberType.I1); - return CreateConvertingGetterDelegate(index, x => x); - } - else if (outputType == typeof(sbyte?)) - { - // sbyte? -> DvInt1 - Ch.Assert(colType == NumberType.I1); - return CreateConvertingGetterDelegate(index, x => x ?? DvInt1.NA); + return CreateConvertingGetterDelegate>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty); } + // T -> T if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(Nullable<>)) Ch.Assert(colType.RawType == Nullable.GetUnderlyingType(outputType)); @@ -805,12 +696,12 @@ public override ValueGetter GetGetter() var itemType = typeT.GetElementType(); var dstItemType = typeof(TDst).GetGenericArguments()[0]; - // String[] -> VBuffer + // String[] -> VBuffer> if (itemType == typeof(string)) { - Contracts.Check(dstItemType == typeof(DvText)); + Contracts.Check(dstItemType == typeof(ReadOnlyMemory)); - ValueGetter> method = GetStringArray; + ValueGetter>> method = GetStringArray; return method as ValueGetter; } @@ -825,7 +716,7 @@ public override ValueGetter GetGetter() if (MetadataType.IsVector) { // VBuffer -> VBuffer - // REVIEW: Do we care about accomodating VBuffer -> VBuffer? + // REVIEW: Do we care about accomodating VBuffer -> VBuffer>? Contracts.Assert(typeT.IsGenericType); Contracts.Check(typeof(TDst).IsGenericType); @@ -845,9 +736,9 @@ public override ValueGetter GetGetter() { if (typeT == typeof(string)) { - // String -> DvText + // String -> ReadOnlyMemory Contracts.Assert(MetadataType.IsText); - ValueGetter m = GetString; + ValueGetter> m = GetString; return m as ValueGetter; } // T -> T @@ -861,14 +752,14 @@ public class TElement { } - private void GetStringArray(ref VBuffer dst) + private void GetStringArray(ref VBuffer> dst) { var value = (string[])(object)Value; var n = Utils.Size(value); - dst = new VBuffer(n, Utils.Size(dst.Values) < n ? new DvText[n] : dst.Values, dst.Indices); + dst = new VBuffer>(n, Utils.Size(dst.Values) < n ? new ReadOnlyMemory[n] : dst.Values, dst.Indices); for (int i = 0; i < n; i++) - dst.Values[i] = new DvText(value[i]); + dst.Values[i] = value[i].AsMemory(); } @@ -890,9 +781,9 @@ private ValueGetter> GetVBufferGetter() return (ref VBuffer dst) => castValue.CopyTo(ref dst); } - private void GetString(ref DvText dst) + private void GetString(ref ReadOnlyMemory dst) { - dst = new DvText((string)(object)Value); + dst = ((string)(object)Value).AsMemory(); } private void GetDirectValue(ref TDst dst) diff --git a/src/Microsoft.ML.Api/TypedCursor.cs b/src/Microsoft.ML.Api/TypedCursor.cs index 923aac5c81..50bb1cd575 100644 --- a/src/Microsoft.ML.Api/TypedCursor.cs +++ b/src/Microsoft.ML.Api/TypedCursor.cs @@ -276,61 +276,11 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit if (fieldType.IsArray) { Ch.Assert(colType.IsVector); - // VBuffer -> String[] + // VBuffer> -> String[] if (fieldType.GetElementType() == typeof(string)) { Ch.Assert(colType.ItemType.IsText); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => x.ToString()); - } - else if (fieldType.GetElementType() == typeof(bool)) - { - Ch.Assert(colType.ItemType.IsBool); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (bool)x); - } - else if (fieldType.GetElementType() == typeof(bool?)) - { - Ch.Assert(colType.ItemType.IsBool); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (bool?)x); - } - else if (fieldType.GetElementType() == typeof(int)) - { - Ch.Assert(colType.ItemType == NumberType.I4); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (int)x); - } - else if (fieldType.GetElementType() == typeof(int?)) - { - Ch.Assert(colType.ItemType == NumberType.I4); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (int?)x); - } - else if (fieldType.GetElementType() == typeof(short)) - { - Ch.Assert(colType.ItemType == NumberType.I2); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (short)x); - } - else if (fieldType.GetElementType() == typeof(short?)) - { - Ch.Assert(colType.ItemType == NumberType.I2); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (short?)x); - } - else if (fieldType.GetElementType() == typeof(long)) - { - Ch.Assert(colType.ItemType == NumberType.I8); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (long)x); - } - else if (fieldType.GetElementType() == typeof(long?)) - { - Ch.Assert(colType.ItemType == NumberType.I8); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (long?)x); - } - else if (fieldType.GetElementType() == typeof(sbyte)) - { - Ch.Assert(colType.ItemType == NumberType.I1); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (sbyte)x); - } - else if (fieldType.GetElementType() == typeof(sbyte?)) - { - Ch.Assert(colType.ItemType == NumberType.I1); - return CreateConvertingVBufferSetter(input, index, poke, peek, x => (sbyte?)x); + return CreateConvertingVBufferSetter, string>(input, index, poke, peek, x => x.ToString()); } // VBuffer -> T[] @@ -344,7 +294,7 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit else if (colType.IsVector) { // VBuffer -> VBuffer - // REVIEW: Do we care about accomodating VBuffer -> VBuffer? + // REVIEW: Do we care about accomodating VBuffer -> VBuffer>? Ch.Assert(fieldType.IsGenericType); Ch.Assert(fieldType.GetGenericTypeDefinition() == typeof(VBuffer<>)); Ch.Assert(fieldType.GetGenericArguments()[0] == colType.ItemType.RawType); @@ -355,71 +305,12 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit { if (fieldType == typeof(string)) { - // DvText -> String + // ReadOnlyMemory -> String Ch.Assert(colType.IsText); Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => x.ToString()); - } - else if (fieldType == typeof(bool)) - { - Ch.Assert(colType.IsBool); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (bool)x); - } - else if (fieldType == typeof(bool?)) - { - Ch.Assert(colType.IsBool); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (bool?)x); - } - else if (fieldType == typeof(int)) - { - Ch.Assert(colType == NumberType.I4); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (int)x); - } - else if (fieldType == typeof(int?)) - { - Ch.Assert(colType == NumberType.I4); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (int?)x); - } - else if (fieldType == typeof(short)) - { - Ch.Assert(colType == NumberType.I2); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (short)x); - } - else if (fieldType == typeof(short?)) - { - Ch.Assert(colType == NumberType.I2); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (short?)x); - } - else if (fieldType == typeof(long)) - { - Ch.Assert(colType == NumberType.I8); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (long)x); - } - else if (fieldType == typeof(long?)) - { - Ch.Assert(colType == NumberType.I8); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (long?)x); - } - else if (fieldType == typeof(sbyte)) - { - Ch.Assert(colType == NumberType.I1); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (sbyte)x); - } - else if (fieldType == typeof(sbyte?)) - { - Ch.Assert(colType == NumberType.I1); - Ch.Assert(peek == null); - return CreateConvertingActionSetter(input, index, poke, x => (sbyte?)x); + return CreateConvertingActionSetter, string>(input, index, poke, x => x.ToString()); } + // T -> T if (fieldType.IsGenericType && fieldType.GetGenericTypeDefinition() == typeof(Nullable<>)) Ch.Assert(colType.RawType == Nullable.GetUnderlyingType(fieldType)); diff --git a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs index de89ddc602..d5a204dd45 100644 --- a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs +++ b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs @@ -1,4 +1,4 @@ -////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// // Command Line Argument Parser // ---------------------------- // Usage diff --git a/src/Microsoft.ML.Core/Data/ColumnType.cs b/src/Microsoft.ML.Core/Data/ColumnType.cs index 96764d68f1..69a6505c51 100644 --- a/src/Microsoft.ML.Core/Data/ColumnType.cs +++ b/src/Microsoft.ML.Core/Data/ColumnType.cs @@ -120,47 +120,38 @@ public bool IsBool } /// - /// Whether this type is the standard timespan type. + /// Whether this type is the standard type. /// public bool IsTimeSpan { get { - if (!(this is TimeSpanType)) - return false; - // TimeSpanType is a singleton. - Contracts.Assert(this == TimeSpanType.Instance); - return true; + Contracts.Assert((this == TimeSpanType.Instance) == (this is TimeSpanType)); + return this is TimeSpanType; } } /// - /// Whether this type is a DvDateTime. + /// Whether this type is a . /// public bool IsDateTime { get { - if (!(this is DateTimeType)) - return false; - // DateTimeType is a singleton. - Contracts.Assert(this == DateTimeType.Instance); - return true; + Contracts.Assert((this == DateTimeType.Instance) == (this is DateTimeType)); + return this is DateTimeType; } } /// - /// Whether this type is a DvDateTimeZone. + /// Whether this type is a /// public bool IsDateTimeZone { get { - if (!(this is DateTimeZoneType)) - return false; - // DateTimeZoneType is a singleton. - Contracts.Assert(this == DateTimeZoneType.Instance); - return true; + Contracts.Assert((this == DateTimeOffsetType.Instance) == (this is DateTimeOffsetType)); + return this is DateTimeOffsetType; } } @@ -319,7 +310,7 @@ public static PrimitiveType FromKind(DataKind kind) if (kind == DataKind.DT) return DateTimeType.Instance; if (kind == DataKind.DZ) - return DateTimeZoneType.Instance; + return DateTimeOffsetType.Instance; return NumberType.FromKind(kind); } } @@ -341,7 +332,7 @@ public static TextType Instance } private TextType() - : base(typeof(DvText), DataKind.TX) + : base(typeof(ReadOnlyMemory), DataKind.TX) { } @@ -573,7 +564,7 @@ public static BoolType Instance } private BoolType() - : base(typeof(DvBool), DataKind.BL) + : base(typeof(bool), DataKind.BL) { } @@ -605,7 +596,7 @@ public static DateTimeType Instance } private DateTimeType() - : base(typeof(DvDateTime), DataKind.DT) + : base(typeof(DateTime), DataKind.DT) { } @@ -623,21 +614,21 @@ public override string ToString() } } - public sealed class DateTimeZoneType : PrimitiveType + public sealed class DateTimeOffsetType : PrimitiveType { - private static volatile DateTimeZoneType _instance; - public static DateTimeZoneType Instance + private static volatile DateTimeOffsetType _instance; + public static DateTimeOffsetType Instance { get { if (_instance == null) - Interlocked.CompareExchange(ref _instance, new DateTimeZoneType(), null); + Interlocked.CompareExchange(ref _instance, new DateTimeOffsetType(), null); return _instance; } } - private DateTimeZoneType() - : base(typeof(DvDateTimeZone), DataKind.DZ) + private DateTimeOffsetType() + : base(typeof(DateTimeOffset), DataKind.DZ) { } @@ -645,7 +636,7 @@ public override bool Equals(ColumnType other) { if (other == this) return true; - Contracts.Assert(!(other is DateTimeZoneType)); + Contracts.Assert(!(other is DateTimeOffsetType)); return false; } @@ -672,7 +663,7 @@ public static TimeSpanType Instance } private TimeSpanType() - : base(typeof(DvTimeSpan), DataKind.TS) + : base(typeof(TimeSpan), DataKind.TS) { } diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs index 0249745691..ad8d8fbfe0 100644 --- a/src/Microsoft.ML.Core/Data/DataKind.cs +++ b/src/Microsoft.ML.Core/Data/DataKind.cs @@ -55,7 +55,7 @@ public enum DataKind : byte public static class DataKindExtensions { public const DataKind KindMin = DataKind.I1; - public const DataKind KindLim = DataKind.UG + 1; + public const DataKind KindLim = DataKind.U16 + 1; public const int KindCount = KindLim - KindMin; /// @@ -141,19 +141,19 @@ public static Type ToType(this DataKind kind) switch (kind) { case DataKind.I1: - return typeof(DvInt1); + return typeof(sbyte); case DataKind.U1: return typeof(byte); case DataKind.I2: - return typeof(DvInt2); + return typeof(short); case DataKind.U2: return typeof(ushort); case DataKind.I4: - return typeof(DvInt4); + return typeof(int); case DataKind.U4: return typeof(uint); case DataKind.I8: - return typeof(DvInt8); + return typeof(long); case DataKind.U8: return typeof(ulong); case DataKind.R4: @@ -161,15 +161,15 @@ public static Type ToType(this DataKind kind) case DataKind.R8: return typeof(Double); case DataKind.TX: - return typeof(DvText); + return typeof(ReadOnlyMemory); case DataKind.BL: - return typeof(DvBool); + return typeof(bool); case DataKind.TS: - return typeof(DvTimeSpan); + return typeof(TimeSpan); case DataKind.DT: - return typeof(DvDateTime); + return typeof(DateTime); case DataKind.DZ: - return typeof(DvDateTimeZone); + return typeof(DateTimeOffset); case DataKind.UG: return typeof(UInt128); } @@ -185,35 +185,35 @@ public static bool TryGetDataKind(this Type type, out DataKind kind) Contracts.CheckValueOrNull(type); // REVIEW: Make this more efficient. Should we have a global dictionary? - if (type == typeof(DvInt1) || type == typeof(sbyte) || type == typeof(sbyte?)) + if (type == typeof(sbyte)) kind = DataKind.I1; - else if (type == typeof(byte) || type == typeof(byte?)) + else if (type == typeof(byte)) kind = DataKind.U1; - else if (type == typeof(DvInt2)|| type== typeof(short) || type == typeof(short?)) + else if (type == typeof(short)) kind = DataKind.I2; - else if (type == typeof(ushort)|| type == typeof(ushort?)) + else if (type == typeof(ushort)) kind = DataKind.U2; - else if (type == typeof(DvInt4) || type == typeof(int)|| type == typeof(int?)) + else if (type == typeof(int)) kind = DataKind.I4; - else if (type == typeof(uint)|| type == typeof(uint?)) + else if (type == typeof(uint)) kind = DataKind.U4; - else if (type == typeof(DvInt8) || type==typeof(long)|| type == typeof(long?)) + else if (type == typeof(long)) kind = DataKind.I8; - else if (type == typeof(ulong)|| type == typeof(ulong?)) + else if (type == typeof(ulong)) kind = DataKind.U8; - else if (type == typeof(Single)|| type == typeof(Single?)) + else if (type == typeof(Single)) kind = DataKind.R4; - else if (type == typeof(Double)|| type == typeof(Double?)) + else if (type == typeof(Double)) kind = DataKind.R8; - else if (type == typeof(DvText)) + else if (type == typeof(ReadOnlyMemory) || type == typeof(string)) kind = DataKind.TX; - else if (type == typeof(DvBool) || type == typeof(bool) || type == typeof(bool?)) + else if (type == typeof(bool)) kind = DataKind.BL; - else if (type == typeof(DvTimeSpan)) + else if (type == typeof(TimeSpan)) kind = DataKind.TS; - else if (type == typeof(DvDateTime)) + else if (type == typeof(DateTime)) kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone)) + else if (type == typeof(DateTimeOffset)) kind = DataKind.DZ; else if (type == typeof(UInt128)) kind = DataKind.UG; diff --git a/src/Microsoft.ML.Core/Data/DateTime.cs b/src/Microsoft.ML.Core/Data/DateTime.cs deleted file mode 100644 index d11be2a494..0000000000 --- a/src/Microsoft.ML.Core/Data/DateTime.cs +++ /dev/null @@ -1,550 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using Microsoft.ML.Runtime.Internal.Utilities; - -namespace Microsoft.ML.Runtime.Data -{ - using Conditional = System.Diagnostics.ConditionalAttribute; - using SysDateTime = System.DateTime; - using SysDateTimeOffset = System.DateTimeOffset; - using SysTimeSpan = System.TimeSpan; - - /// - /// A struct to represent a DateTime column type - /// - public struct DvDateTime : IEquatable, IComparable - { - public const long MaxTicks = 3155378975999999999; - private readonly DvInt8 _ticks; - - /// - /// This ctor initializes _ticks to the value of sdt.Ticks, and ignores its DateTimeKind value. - /// - public DvDateTime(SysDateTime sdt) - { - _ticks = sdt.Ticks; - AssertValid(); - } - - /// - /// This ctor accepts any value for ticks, but produces an NA if ticks is out of the legal range. - /// - public DvDateTime(DvInt8 ticks) - { - if ((ulong)ticks.RawValue > MaxTicks) - _ticks = DvInt8.NA; - else - _ticks = ticks; - AssertValid(); - } - - [Conditional("DEBUG")] - internal void AssertValid() - { - Contracts.Assert((ulong)_ticks.RawValue <= MaxTicks || _ticks.IsNA); - } - - public DvInt8 Ticks - { - get - { - AssertValid(); - return _ticks; - } - } - - // REVIEW: Add more System.DateTime members returning their corresponding 'Dv' types (task 4255). - /// - /// Gets the date component of this object. - /// - public DvDateTime Date - { - get - { - AssertValid(); - if (IsNA) - return NA; - return new DvDateTime(GetSysDateTime().Date); - } - } - - /// - /// Gets a DvDateTime object representing the current UTC date and time. - /// - public static DvDateTime UtcNow { get { return new DvDateTime(SysDateTime.UtcNow); } } - - public bool IsNA - { - get - { - AssertValid(); - return (ulong)_ticks.RawValue > MaxTicks; - } - } - - public static DvDateTime NA - { - get { return new DvDateTime(DvInt8.NA); } - } - - public static explicit operator SysDateTime?(DvDateTime dvDt) - { - if (dvDt.IsNA) - return null; - return dvDt.GetSysDateTime(); - } - - /// - /// Creates a new DvDateTime with the same number of ticks as in sdt, ignoring its DateTimeKind value. - /// - public static implicit operator DvDateTime(SysDateTime sdt) - { - return new DvDateTime(sdt); - } - - public static implicit operator DvDateTime(SysDateTime? sdt) - { - if (sdt == null) - return DvDateTime.NA; - return new DvDateTime(sdt.Value); - } - - public override string ToString() - { - AssertValid(); - if (IsNA) - return ""; - return GetSysDateTime().ToString("o"); - } - - internal SysDateTime GetSysDateTime() - { - AssertValid(); - Contracts.Assert(!IsNA); - return new SysDateTime(_ticks.RawValue); - } - - public bool Equals(DvDateTime other) - { - return _ticks.RawValue == other._ticks.RawValue; - } - - public override bool Equals(object obj) - { - return obj is DvDateTime && Equals((DvDateTime)obj); - } - - public int CompareTo(DvDateTime other) - { - if (_ticks.RawValue == other._ticks.RawValue) - return 0; - return _ticks.RawValue < other._ticks.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return _ticks.GetHashCode(); - } - } - - /// - /// A struct to represent a DateTimeZone column type. - /// - public struct DvDateTimeZone : IEquatable, IComparable - { - public const long TicksPerMinute = 600000000; - public const long MaxMinutesOffset = 840; - public const long MinMinutesOffset = -840; - - // Stores the UTC date-time (convert to clock time by adding the offset). - private readonly DvDateTime _dateTime; - // Store the offset in minutes. - private readonly DvInt2 _offset; - - // This assumes (and asserts) that the dt/offset combination is valid. - // Callers should do the validation. - private DvDateTimeZone(DvDateTime dt, DvInt2 offset) - { - _dateTime = dt; - _offset = offset; - AssertValid(); - } - - /// - /// Given a number of ticks for the date time portion and a number of minutes for - /// the time zone offset, this constructs a new DvDateTimeZone. If anything is invalid, - /// it produces NA. - /// - /// The number of clock ticks in the date time portion - /// The time zone offset in minutes - public DvDateTimeZone(DvInt8 ticks, DvInt2 offset) - { - var dt = new DvDateTime(ticks); - if (dt.IsNA || offset.IsNA || MinMinutesOffset > offset.RawValue || offset.RawValue > MaxMinutesOffset) - { - _dateTime = DvDateTime.NA; - _offset = DvInt2.NA; - } - else - { - _offset = offset; - _dateTime = ValidateDate(dt, ref _offset); - } - AssertValid(); - } - - public DvDateTimeZone(SysDateTimeOffset dto) - { - // Since it is constructed from a SysDateTimeOffset, all the validations should work. - var success = TryValidateOffset(dto.Offset.Ticks, out _offset); - Contracts.Assert(success); - _dateTime = ValidateDate(new DvDateTime(dto.DateTime), ref _offset); - Contracts.Assert(!_dateTime.IsNA); - Contracts.Assert(!_offset.IsNA); - AssertValid(); - } - - /// - /// Constructs a DvDateTimeZone from a clock date-time and a time zone offset from UTC. - /// - /// The clock time - /// The offset - public DvDateTimeZone(DvDateTime dt, DvTimeSpan offset) - { - if (dt.IsNA || offset.IsNA || !TryValidateOffset(offset.Ticks, out _offset)) - { - _dateTime = DvDateTime.NA; - _offset = DvInt2.NA; - } - else - _dateTime = ValidateDate(dt, ref _offset); - AssertValid(); - } - - /// - /// This method takes a DvDateTime representing clock time, and a TimeSpan representing an offset, - /// validates that both the clock time and the UTC time (which is the clock time minus the offset) - /// are within the valid range, and returns a DvDateTime representing the UTC time (dateTime-offset). - /// - /// The clock time - /// The offset. This value is assumed to be validated as a legal offset: - /// a value in whole minutes, between -14 and 14 hours. - /// The UTC DvDateTime representing the input clock time minus the offset - private static DvDateTime ValidateDate(DvDateTime dateTime, ref DvInt2 offset) - { - Contracts.Assert(!dateTime.IsNA); - Contracts.Assert(!offset.IsNA); - - // Validate that both the UTC and clock times are legal. - Contracts.Assert(MinMinutesOffset <= offset.RawValue && offset.RawValue <= MaxMinutesOffset); - var offsetTicks = offset.RawValue * TicksPerMinute; - // This operation cannot overflow because offset should have already been validated to be within - // 14 hours and the DateTime instance is more than that distance from the boundaries of Int64. - long utcTicks = dateTime.Ticks.RawValue - offsetTicks; - var dvdt = new DvDateTime(utcTicks); - if (dvdt.IsNA) - offset = DvInt2.NA; - return dvdt; - } - - /// - /// This method takes a TimeSpan offset, validates that it is a legal offset for DvDateTimeZone (i.e. - /// in whole minutes, and between -14 and 14 hours), and returns the offset in number of minutes. - /// - /// - /// - /// - private static bool TryValidateOffset(DvInt8 offsetTicks, out DvInt2 offset) - { - if (offsetTicks.IsNA || offsetTicks.RawValue % TicksPerMinute != 0) - { - offset = DvInt2.NA; - return false; - } - - long mins = offsetTicks.RawValue / TicksPerMinute; - short res = (short)mins; - if (res != mins || res > MaxMinutesOffset || res < MinMinutesOffset) - { - offset = DvInt2.NA; - return false; - } - offset = res; - Contracts.Assert(!offset.IsNA); - return true; - } - - [Conditional("DEBUG")] - private void AssertValid() - { - _dateTime.AssertValid(); - if (_dateTime.IsNA) - Contracts.Assert(_offset.IsNA); - else - { - Contracts.Assert(MinMinutesOffset <= _offset.RawValue && _offset.RawValue <= MaxMinutesOffset); - Contracts.Assert((ulong)(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute) - <= (ulong)DvDateTime.MaxTicks); - } - } - - public DvDateTime ClockDateTime - { - get - { - AssertValid(); - if (_dateTime.IsNA) - return DvDateTime.NA; - var res = new DvDateTime(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute); - Contracts.Assert(!res.IsNA); - return res; - } - } - - /// - /// Gets the UTC date and time. - /// - public DvDateTime UtcDateTime - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return _dateTime; - } - } - - /// - /// Gets the offset as a time span. - /// - public DvTimeSpan Offset - { - get - { - AssertValid(); - if (_offset.IsNA) - return DvTimeSpan.NA; - return new DvTimeSpan(_offset.RawValue * TicksPerMinute); - } - } - - /// - /// Gets the offset in minutes. - /// - public DvInt2 OffsetMinutes - { - get - { - AssertValid(); - return _offset; - } - } - - // REVIEW: Add more System.DateTimeOffset members returning their corresponding 'Dv' types (task 4255). - - /// - /// Gets the date component of the ClockDateTime. - /// - public DvDateTime ClockDate - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return ClockDateTime.Date; - } - } - - /// - /// Gets the date component of the UtcDateTime. - /// - public DvDateTime UtcDate - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return _dateTime.Date; - } - } - - /// - /// Gets a DvDateTimeZone object representing the current UTC date and time (with offset=0). - /// - public static DvDateTimeZone UtcNow { get { return new DvDateTimeZone(SysDateTimeOffset.UtcNow); } } - - public bool IsNA - { - get - { - AssertValid(); - return _dateTime.IsNA; - } - } - - // The missing value for DvDateTimeZone is represented by a DvDateTimeZone with _dateTime = DvDateTime.NA - // and _offset = 0. - public static DvDateTimeZone NA - { - get { return new DvDateTimeZone(DvDateTime.NA, DvInt2.NA); } - } - - public static explicit operator SysDateTimeOffset?(DvDateTimeZone dvDto) - { - if (dvDto.IsNA) - return null; - return dvDto.GetSysDateTimeOffset(); - } - - public static implicit operator DvDateTimeZone(SysDateTimeOffset sdto) - { - return new DvDateTimeZone(sdto); - } - - public static implicit operator DvDateTimeZone(SysDateTimeOffset? sdto) - { - if (sdto == null) - return DvDateTimeZone.NA; - return new DvDateTimeZone(sdto.Value); - } - - public override string ToString() - { - AssertValid(); - if (IsNA) - return ""; - - return GetSysDateTimeOffset().ToString("o"); - } - - private DateTimeOffset GetSysDateTimeOffset() - { - AssertValid(); - Contracts.Assert(!IsNA); - return new SysDateTimeOffset(ClockDateTime.GetSysDateTime(), new TimeSpan(0, _offset.RawValue, 0)); - } - - /// - /// Compare two values for equality. Note that this differs from System.DateTimeOffset's - /// definition of Equals, which only compares the UTC values, not the offsets. - /// - public bool Equals(DvDateTimeZone other) - { - return _offset.RawValue == other._offset.RawValue && _dateTime.Equals(other._dateTime); - } - - public override bool Equals(object obj) - { - return obj is DvDateTimeZone && Equals((DvDateTimeZone)obj); - } - - /// - /// Compare two values for ordering. Note that this differs from System.DateTimeOffset's - /// definition of CompareTo, which only compares the UTC values, not the offsets. - /// - public int CompareTo(DvDateTimeZone other) - { - AssertValid(); - other.AssertValid(); - - int res = _dateTime.CompareTo(other._dateTime); - if (res != 0) - return res; - if (_offset.RawValue == other._offset.RawValue) - return 0; - return _offset.RawValue < other._offset.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return Hashing.CombineHash(_dateTime.GetHashCode(), _offset.GetHashCode()); - } - } - - /// - /// A struct to represent a DateTime column type - /// - public struct DvTimeSpan : IEquatable, IComparable - { - private readonly DvInt8 _ticks; - - public DvInt8 Ticks { get { return _ticks; } } - - public DvTimeSpan(DvInt8 ticks) - { - _ticks = ticks; - } - - public DvTimeSpan(SysTimeSpan sts) - { - _ticks = sts.Ticks; - } - - public DvTimeSpan(SysTimeSpan? sts) - { - _ticks = sts != null ? sts.GetValueOrDefault().Ticks : DvInt8.NA; - } - - public bool IsNA - { - get { return _ticks.IsNA; } - } - - public static DvTimeSpan NA - { - get { return new DvTimeSpan(DvInt8.NA); } - } - - public static explicit operator SysTimeSpan?(DvTimeSpan ts) - { - if (ts.IsNA) - return null; - return new SysTimeSpan(ts._ticks.RawValue); - } - - public static implicit operator DvTimeSpan(SysTimeSpan sts) - { - return new DvTimeSpan(sts); - } - - public static implicit operator DvTimeSpan(SysTimeSpan? sts) - { - return new DvTimeSpan(sts); - } - - public override string ToString() - { - if (IsNA) - return ""; - return new SysTimeSpan(_ticks.RawValue).ToString("c"); - } - - public bool Equals(DvTimeSpan other) - { - return _ticks.RawValue == other._ticks.RawValue; - } - - public override bool Equals(object obj) - { - return obj is DvTimeSpan && Equals((DvTimeSpan)obj); - } - - public int CompareTo(DvTimeSpan other) - { - if (_ticks.RawValue == other._ticks.RawValue) - return 0; - return _ticks.RawValue < other._ticks.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return _ticks.GetHashCode(); - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvBool.cs b/src/Microsoft.ML.Core/Data/DvBool.cs deleted file mode 100644 index f17cb596d4..0000000000 --- a/src/Microsoft.ML.Core/Data/DvBool.cs +++ /dev/null @@ -1,226 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.CompilerServices; - -namespace Microsoft.ML.Runtime.Data -{ - using BL = DvBool; - using R4 = Single; - using R8 = Double; - - public struct DvBool : IEquatable, IComparable - { - private const byte _false = 0; - private const byte _true = 1; - private const byte _na = 128; - public const byte RawNA = _na; - - private byte _value; - - public static BL False { get { BL res; res._value = _false; return res; } } - public static BL True { get { BL res; res._value = _true; return res; } } - public static BL NA { get { BL res; res._value = _na; return res; } } - - /// - /// Property to return the raw value. - /// - public byte RawValue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value; } - } - - /// - /// Static method to return the raw value. This is more convenient than the - /// property in code-generation scenarios. - /// - public static byte GetRawBits(BL a) - { - return a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private DvBool(int value) - { - Contracts.Assert(value == _true || value == _false || value == _na); - _value = (byte)value; - } - - /// - /// Returns whether this value is false. - /// - public bool IsFalse - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == _false; } - } - - /// - /// Returns whether this value is true. - /// - public bool IsTrue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == _true; } - } - - /// - /// Returns whether this value is NA. - /// - public bool IsNA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value > _true; } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator BL(bool value) - { - BL res; - res._value = value ? _true : _false; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator BL(bool? value) - { - BL res; - res._value = value == null ? _na : value.GetValueOrDefault() ? _true : _false; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator bool(BL value) - { - switch (value._value) - { - case _false: - return false; - case _true: - return true; - default: - throw Contracts.ExceptValue(nameof(value), "NA cast to bool"); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator bool?(BL value) - { - switch (value._value) - { - case _false: - return false; - case _true: - return true; - default: - return null; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R4(BL value) - { - if (value._value <= _true) - return value._value; - return Single.NaN; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R8(BL value) - { - if (value._value <= _true) - return value._value; - return Double.NaN; - } - - public override int GetHashCode() - { - return _value.GetHashCode(); - } - - public override bool Equals(object obj) - { - if (obj is BL) - return _value == ((BL)obj)._value; - return false; - } - - public bool Equals(BL other) - { - // Note that if one or both are "non-standard" NA values, this - // could return false. Theoretically, that should never happen, - // but unsafe code could cause it. - return _value == other._value; - } - - public int CompareTo(BL other) - { - // Note that if one or both are "non-standard" NA values, this could produce unexpected comparisons. - // Theoretically, that should never happen, but unsafe code could cause it. - Contracts.Assert(unchecked((sbyte)RawNA) < (sbyte)_false); - if (_value == other._value) - return 0; - return (sbyte)_value < (sbyte)other._value ? -1 : 1; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator ==(BL a, BL b) - { - if (a._value <= _true && b._value <= _true) - return a._value == b._value ? True : False; - return NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !=(BL a, BL b) - { - if (a._value <= _true && b._value <= _true) - return a._value != b._value ? True : False; - return NA; - } - - public override string ToString() - { - if (_value == _false) - return "False"; - if (_value == _true) - return "True"; - return "NA"; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !(BL a) - { - if (a._value <= _true) - a._value ^= 1; - return a; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator |(BL a, BL b) - { - if (a._value == _true) - return a; - if (b._value == _true) - return b; - if (a._value != _false) - return a; - return b; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator &(BL a, BL b) - { - if (a._value == _false) - return a; - if (b._value == _false) - return b; - if (a._value != _true) - return a; - return b; - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvInt1.cs b/src/Microsoft.ML.Core/Data/DvInt1.cs deleted file mode 100644 index ced2a4688d..0000000000 --- a/src/Microsoft.ML.Core/Data/DvInt1.cs +++ /dev/null @@ -1,264 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.CompilerServices; - -namespace Microsoft.ML.Runtime.Data -{ - using BL = DvBool; - using I2 = DvInt2; - using I4 = DvInt4; - using I8 = DvInt8; - using IX = DvInt1; - using R4 = Single; - using R8 = Double; - using RawI8 = Int64; - using RawIX = SByte; - - public struct DvInt1 : IEquatable, IComparable - { - public const RawIX RawNA = RawIX.MinValue; - - // Ideally this would be readonly. However, note that this struct has no - // ctor, but instead only has conversion operators. The implicit conversion - // operator from RawIX to DvIX performs better than an equivalent ctor, - // and the conversion operator must assign the _value field. - private RawIX _value; - - /// - /// Property to return the raw value. - /// - public RawIX RawValue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value; } - } - - /// - /// Static method to return the raw value. This is more convenient than the - /// property in code-generation scenarios. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RawIX GetRawBits(IX a) - { - return a._value; - } - - public static IX NA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return RawNA; } - } - - public bool IsNA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == RawNA; } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX value) - { - IX res; - res._value = value; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX? value) - { - IX res; - res._value = value ?? RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX(IX value) - { - if (value._value == RawNA) - throw Contracts.ExceptValue(nameof(value), "NA cast to sbyte"); - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX?(IX value) - { - if (value._value == RawNA) - return null; - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(BL a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I2 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I4 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I8 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R4 a) - { - return (IX)(R8)a; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R4(IX a) - { - if (a._value == RawNA) - return R4.NaN; - return (R4)a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R8 a) - { - const R8 lim = -(R8)RawIX.MinValue; - if (-lim < a && a < lim) - { - RawIX n = (RawIX)a; -#if DEBUG - Contracts.Assert(!a.IsNA()); - Contracts.Assert(n != RawNA); - RawI8 nn = (RawI8)a; - Contracts.Assert(nn == n); - if (a >= 0) - Contracts.Assert(a - 1 < n & n <= a); - else - Contracts.Assert(a <= n & n < a + 1); -#endif - return n; - } - - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R8(IX a) - { - if (a._value == RawNA) - return R8.NaN; - return (R8)a._value; - } - - public override int GetHashCode() - { - return _value.GetHashCode(); - } - - public override bool Equals(object obj) - { - if (obj is IX) - return _value == ((IX)obj)._value; - return false; - } - - public bool Equals(IX other) - { - return _value == other._value; - } - - public int CompareTo(IX other) - { - if (_value == other._value) - return 0; - return _value < other._value ? -1 : 1; - } - - public override string ToString() - { - if (_value == RawNA) - return "NA"; - return _value.ToString(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator ==(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av == bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av != bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av < bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av <= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av >= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av > bv ? BL.True : BL.False; - return BL.NA; - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvInt2.cs b/src/Microsoft.ML.Core/Data/DvInt2.cs deleted file mode 100644 index 33599f6468..0000000000 --- a/src/Microsoft.ML.Core/Data/DvInt2.cs +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.CompilerServices; - -namespace Microsoft.ML.Runtime.Data -{ - using BL = DvBool; - using I1 = DvInt1; - using I4 = DvInt4; - using I8 = DvInt8; - using IX = DvInt2; - using R4 = Single; - using R8 = Double; - using RawI8 = Int64; - using RawIX = Int16; - - public struct DvInt2 : IEquatable, IComparable - { - public const RawIX RawNA = RawIX.MinValue; - - // Ideally this would be readonly. However, note that this struct has no - // ctor, but instead only has conversion operators. The implicit conversion - // operator from RawIX to DvIX performs better than an equivalent ctor, - // and the conversion operator must assign the _value field. - private RawIX _value; - - /// - /// Property to return the raw value. - /// - public RawIX RawValue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value; } - } - - /// - /// Static method to return the raw value. This is more convenient than the - /// property in code-generation scenarios. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RawIX GetRawBits(IX a) - { - return a._value; - } - - public static IX NA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return RawNA; } - } - - public bool IsNA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == RawNA; } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX value) - { - IX res; - res._value = value; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX? value) - { - IX res; - res._value = value ?? RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX(IX value) - { - if (value._value == RawNA) - throw Contracts.ExceptValue(nameof(value), "NA cast to short"); - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX?(IX value) - { - if (value._value == RawNA) - return null; - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(BL a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I1 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I4 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I8 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R4 a) - { - return (IX)(R8)a; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R4(IX a) - { - if (a._value == RawNA) - return R4.NaN; - return (R4)a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R8 a) - { - const R8 lim = -(R8)RawIX.MinValue; - if (-lim < a && a < lim) - { - RawIX n = (RawIX)a; -#if DEBUG - Contracts.Assert(!a.IsNA()); - Contracts.Assert(n != RawNA); - RawI8 nn = (RawI8)a; - Contracts.Assert(nn == n); - if (a >= 0) - Contracts.Assert(a - 1 < n & n <= a); - else - Contracts.Assert(a <= n & n < a + 1); -#endif - return n; - } - - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R8(IX a) - { - if (a._value == RawNA) - return R8.NaN; - return (R8)a._value; - } - - public override int GetHashCode() - { - return _value.GetHashCode(); - } - - public override bool Equals(object obj) - { - if (obj is IX) - return _value == ((IX)obj)._value; - return false; - } - - public bool Equals(IX other) - { - return _value == other._value; - } - - public int CompareTo(IX other) - { - if (_value == other._value) - return 0; - return _value < other._value ? -1 : 1; - } - - public override string ToString() - { - if (_value == RawNA) - return "NA"; - return _value.ToString(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator ==(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av == bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av != bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av < bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av <= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av >= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av > bv ? BL.True : BL.False; - return BL.NA; - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvInt4.cs b/src/Microsoft.ML.Core/Data/DvInt4.cs deleted file mode 100644 index 23c7e89242..0000000000 --- a/src/Microsoft.ML.Core/Data/DvInt4.cs +++ /dev/null @@ -1,456 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.CompilerServices; - -namespace Microsoft.ML.Runtime.Data -{ - using BL = DvBool; - using I1 = DvInt1; - using I2 = DvInt2; - using I8 = DvInt8; - using IX = DvInt4; - using R4 = Single; - using R8 = Double; - using RawI8 = Int64; - using RawIX = Int32; - - public struct DvInt4 : IEquatable, IComparable - { - public const RawIX RawNA = RawIX.MinValue; - - // Ideally this would be readonly. However, note that this struct has no - // ctor, but instead only has conversion operators. The implicit conversion - // operator from RawIX to DvIX performs better than an equivalent ctor, - // and the conversion operator must assign the _value field. - private RawIX _value; - - /// - /// Property to return the raw value. - /// - public RawIX RawValue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value; } - } - - /// - /// Static method to return the raw value. This is more convenient than the - /// property in code-generation scenarios. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RawIX GetRawBits(IX a) - { - return a._value; - } - - public static IX NA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return RawNA; } - } - - public bool IsNA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == RawNA; } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX value) - { - IX res; - res._value = value; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX? value) - { - IX res; - res._value = value ?? RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX(IX value) - { - if (value._value == RawNA) - throw Contracts.ExceptValue(nameof(value), "NA cast to int"); - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX?(IX value) - { - if (value._value == RawNA) - return null; - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(BL a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I1 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I2 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(I8 a) - { - RawIX res = (RawIX)a.RawValue; - if (res != a.RawValue) - return RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R4 a) - { - return (IX)(R8)a; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R4(IX a) - { - if (a._value == RawNA) - return R4.NaN; - return (R4)a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R8 a) - { - const R8 lim = -(R8)RawIX.MinValue; - if (-lim < a && a < lim) - { - RawIX n = (RawIX)a; -#if DEBUG - Contracts.Assert(!a.IsNA()); - Contracts.Assert(n != RawNA); - RawI8 nn = (RawI8)a; - Contracts.Assert(nn == n); - if (a >= 0) - Contracts.Assert(a - 1 < n & n <= a); - else - Contracts.Assert(a <= n & n < a + 1); -#endif - return n; - } - - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R8(IX a) - { - if (a._value == RawNA) - return R8.NaN; - return (R8)a._value; - } - - public override int GetHashCode() - { - return _value.GetHashCode(); - } - - public override bool Equals(object obj) - { - if (obj is IX) - return _value == ((IX)obj)._value; - return false; - } - - public bool Equals(IX other) - { - return _value == other._value; - } - - public int CompareTo(IX other) - { - if (_value == other._value) - return 0; - return _value < other._value ? -1 : 1; - } - - public override string ToString() - { - if (_value == RawNA) - return "NA"; - return _value.ToString(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator ==(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av == bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av != bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av < bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av <= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av >= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av > bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator -(IX a) - { - return -a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator +(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - { - var res = av + bv; - // Overflow happens iff the sign of the result is different than both source values. - if ((av ^ res) >= 0) - return res; - if ((bv ^ res) >= 0) - return res; - } - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator -(IX a, IX b) - { - var av = a._value; - var bv = -b._value; - if (av != RawNA && bv != RawNA) - { - var res = av + bv; - // Overflow happens iff the sign of the result is different than both source values. - if ((av ^ res) >= 0) - return res; - if ((bv ^ res) >= 0) - return res; - } - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator *(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - { - RawI8 res = (RawI8)av * bv; - if (-RawIX.MaxValue <= res && res <= RawIX.MaxValue) - return (RawIX)res; - } - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator /(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA && bv != 0) - return av / bv; - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator %(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA && bv != 0) - return av % bv; - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Abs(IX a) - { - // Can't use Math.Abs since it throws on the RawNA value. - return a._value >= 0 ? a._value : -a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Sign(IX a) - { - var val = a._value; - var neg = -val; - // This works for NA since -RawNA == RawNA. - return val > neg ? +1 : val < neg ? -1 : val; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Min(IX a, IX b) - { - var v1 = a._value; - var v2 = b._value; - // This works for NA since RawNA == RawIX.MinValue. - return v1 <= v2 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public IX Min(IX b) - { - var v1 = _value; - var v2 = b._value; - // This works for NA since RawNA == RawIX.MinValue. - return v1 <= v2 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Max(IX a, IX b) - { - var v1 = a._value; - var v2 = b._value; - // This works for NA since RawNA - 1 == RawIX.MaxValue. - return v1 - 1 >= v2 - 1 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public IX Max(IX b) - { - var v1 = _value; - var v2 = b._value; - // This works for NA since RawNA - 1 == RawIX.MaxValue. - return v1 - 1 >= v2 - 1 ? v1 : v2; - } - - /// - /// Raise a to the b power. Special cases: - /// * 1^NA => 1 - /// * NA^0 => 1 - /// - public static IX Pow(IX a, IX b) - { - var av = a.RawValue; - var bv = b.RawValue; - - if (av == 1) - return 1; - switch (bv) - { - case 0: - return 1; - case 1: - return av; - case 2: - return a * a; - case RawNA: - return RawNA; - } - if (av == -1) - return (bv & 1) == 0 ? 1 : -1; - if (bv < 0) - return RawNA; - if (av == RawNA) - return RawNA; - - // Since the abs of the base is at least two, the exponent must be less than 31. - if (bv >= 31) - return RawNA; - - bool neg = false; - if (av < 0) - { - av = -av; - neg = (bv & 1) != 0; - } - Contracts.Assert(av >= 2); - - // Since the exponent is at least three, the base must be <= 1290. - Contracts.Assert(bv >= 3); - if (av > 1290) - return RawNA; - - // REVIEW: Should we use a checked context and exception catching like I8 does? - ulong u = (ulong)(uint)av; - ulong result = 1; - for (; ; ) - { - if ((bv & 1) != 0 && (result *= u) > RawIX.MaxValue) - return RawNA; - bv >>= 1; - if (bv == 0) - break; - if ((u *= u) > RawIX.MaxValue) - return RawNA; - } - Contracts.Assert(result <= RawIX.MaxValue); - - var res = (RawIX)result; - if (neg) - res = -res; - return res; - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvInt8.cs b/src/Microsoft.ML.Core/Data/DvInt8.cs deleted file mode 100644 index 3212e21fa6..0000000000 --- a/src/Microsoft.ML.Core/Data/DvInt8.cs +++ /dev/null @@ -1,511 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.CompilerServices; - -namespace Microsoft.ML.Runtime.Data -{ - using BL = DvBool; - using I1 = DvInt1; - using I2 = DvInt2; - using I4 = DvInt4; - using IX = DvInt8; - using R4 = Single; - using R8 = Double; - using RawIX = Int64; - - public struct DvInt8 : IEquatable, IComparable - { - public const RawIX RawNA = RawIX.MinValue; - - // Ideally this would be readonly. However, note that this struct has no - // ctor, but instead only has conversion operators. The implicit conversion - // operator from RawIX to DvIX performs better than an equivalent ctor, - // and the conversion operator must assign the _value field. - private RawIX _value; - - /// - /// Property to return the raw value. - /// - public RawIX RawValue - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value; } - } - - /// - /// Static method to return the raw value. This is more convenient than the - /// property in code-generation scenarios. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static RawIX GetRawBits(IX a) - { - return a._value; - } - - public static IX NA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return RawNA; } - } - - public bool IsNA - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get { return _value == RawNA; } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX value) - { - IX res; - res._value = value; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(RawIX? value) - { - IX res; - res._value = value ?? RawNA; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX(IX value) - { - if (value._value == RawNA) - throw Contracts.ExceptValue(nameof(value), "NA cast to long"); - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator RawIX?(IX value) - { - if (value._value == RawNA) - return null; - return value._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(BL a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I1 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I2 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static implicit operator IX(I4 a) - { - if (a.IsNA) - return RawNA; - return (RawIX)a.RawValue; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R4 a) - { - return (IX)(R8)a; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R4(IX a) - { - if (a._value == RawNA) - return R4.NaN; - return (R4)a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator IX(R8 a) - { - const R8 lim = -(R8)RawIX.MinValue; - if (-lim < a && a < lim) - { - RawIX n = (RawIX)a; -#if DEBUG - Contracts.Assert(!a.IsNA()); - Contracts.Assert(n != RawNA); - // Note that an R8 cannot represent long.MaxValue exactly so y + 1.0 below might be the same as y. - R8 x = a; - R8 y = n; - if (a < 0) - { - x = -x; - y = -y; - } - Contracts.Assert(y <= x); - Contracts.Assert(x < y + 1.0 | y + 1.0 == y & x == y); -#endif - return n; - } - - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static explicit operator R8(IX a) - { - if (a._value == RawNA) - return R8.NaN; - return (R8)a._value; - } - - public override int GetHashCode() - { - return _value.GetHashCode(); - } - - public override bool Equals(object obj) - { - if (obj is IX) - return _value == ((IX)obj)._value; - return false; - } - - public bool Equals(IX other) - { - return _value == other._value; - } - - public int CompareTo(IX other) - { - if (_value == other._value) - return 0; - return _value < other._value ? -1 : 1; - } - - public override string ToString() - { - if (_value == RawNA) - return "NA"; - return _value.ToString(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator ==(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av == bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator !=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av != bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av < bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator <=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av <= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >=(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av >= bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL operator >(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - return av > bv ? BL.True : BL.False; - return BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator -(IX a) - { - return -a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator +(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA) - { - var res = av + bv; - // Overflow happens iff the sign of the result is different than both source values. - if ((av ^ res) >= 0) - return res; - if ((bv ^ res) >= 0) - return res; - } - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator -(IX a, IX b) - { - var av = a._value; - var bv = -b._value; - if (av != RawNA && bv != RawNA) - { - var res = av + bv; - // Overflow happens iff the sign of the result is different than both source values. - if ((av ^ res) >= 0) - return res; - if ((bv ^ res) >= 0) - return res; - } - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator *(IX a, IX b) - { - var av = a._value; - var bv = b._value; - bool neg = (av ^ bv) < 0; - if (av < 0) - { - if (av == RawNA) - return RawNA; - av = -av; - } - if (bv < 0) - { - if (bv == RawNA) - return RawNA; - bv = -bv; - } - - // Deal with the low 32 bits. - ulong lo1 = (ulong)av & 0x00000000FFFFFFFF; - ulong lo2 = (ulong)bv & 0x00000000FFFFFFFF; - RawIX res = (RawIX)(lo1 * lo2); - if (res < 0) - return RawNA; - - // Get the high 32 bits, including cross terms. - ulong hi1 = (ulong)av >> 32; - ulong hi2 = (ulong)bv >> 32; - if (hi1 != 0) - { - // If both high words are non-zero, overflow is guaranteed. - if (hi2 != 0) - return RawNA; - // Compute the cross term. - ulong tmp = hi1 * lo2; - if ((tmp & 0xFFFFFFFF80000000) != 0) - return RawNA; - res += (long)(tmp << 32); - if (res < 0) - return RawNA; - } - else if (hi2 != 0) - { - // Compute the cross term. - ulong tmp = hi2 * lo1; - if ((tmp & 0xFFFFFFFF80000000) != 0) - return RawNA; - res += (long)(tmp << 32); - if (res < 0) - return RawNA; - } - - // Adjust the sign. - if (neg) - res = -res; - return res; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator /(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA && bv != 0) - return av / bv; - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX operator %(IX a, IX b) - { - var av = a._value; - var bv = b._value; - if (av != RawNA && bv != RawNA && bv != 0) - return av % bv; - return RawNA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Abs(IX a) - { - // Can't use Math.Abs since it throws on the RawNA value. - return a._value >= 0 ? a._value : -a._value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Sign(IX a) - { - var val = a._value; - var neg = -val; - // This works for NA since -RawNA == RawNA. - return val > neg ? +1 : val < neg ? -1 : val; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Min(IX a, IX b) - { - var v1 = a._value; - var v2 = b._value; - // This works for NA since RawNA == RawIX.MinValue. - return v1 <= v2 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public IX Min(IX b) - { - var v1 = _value; - var v2 = b._value; - // This works for NA since RawNA == RawIX.MinValue. - return v1 <= v2 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static IX Max(IX a, IX b) - { - var v1 = a._value; - var v2 = b._value; - // This works for NA since RawNA - 1 == RawIX.MaxValue. - return v1 - 1 >= v2 - 1 ? v1 : v2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public IX Max(IX b) - { - var v1 = _value; - var v2 = b._value; - // This works for NA since RawNA - 1 == RawIX.MaxValue. - return v1 - 1 >= v2 - 1 ? v1 : v2; - } - - /// - /// Raise a to the b power. Special cases: - /// * 1^NA => 1 - /// * NA^0 => 1 - /// - public static IX Pow(IX a, IX b) - { - var av = a.RawValue; - var bv = b.RawValue; - - if (av == 1) - return 1; - switch (bv) - { - case 0: - return 1; - case 1: - return av; - case 2: - return a * a; - case RawNA: - return RawNA; - } - if (av == -1) - return (bv & 1) == 0 ? 1 : -1; - if (bv < 0) - return RawNA; - if (av == RawNA) - return RawNA; - - // Since the abs of the base is at least two, the exponent must be less than 63. - if (bv >= 63) - return RawNA; - - bool neg = false; - if (av < 0) - { - av = -av; - neg = (bv & 1) != 0; - } - Contracts.Assert(av >= 2); - - // Since the exponent is at least three, the base must be < 2^21. - Contracts.Assert(bv >= 3); - if (av >= (1L << 21)) - return RawNA; - - long res = 1; - long x = av; - // REVIEW: Is the catch too slow in the overflow case? - try - { - checked - { - for (; ; ) - { - if ((bv & 1) != 0) - res *= x; - bv >>= 1; - if (bv == 0) - break; - x *= x; - } - } - } - catch (OverflowException) - { - return RawNA; - } - Contracts.Assert(res > 0); - - if (neg) - res = -res; - return res; - } - } -} diff --git a/src/Microsoft.ML.Core/Data/DvText.cs b/src/Microsoft.ML.Core/Data/DvText.cs deleted file mode 100644 index 04d3bd8918..0000000000 --- a/src/Microsoft.ML.Core/Data/DvText.cs +++ /dev/null @@ -1,680 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Collections.Generic; -using System.Text; -using Microsoft.ML.Runtime.Internal.Utilities; - -namespace Microsoft.ML.Runtime.Data -{ - /// - /// A text value. This essentially wraps a portion of a string. This can distinguish between a length zero - /// span of characters and "NA", the latter having a Length of -1. - /// - public struct DvText : IEquatable, IComparable - { - /// - /// The fields/properties , , and are - /// private so client code can't easily "cheat" and look outside the characters. Client - /// code that absolutely needs access to this information can call . - /// - private readonly string _outerBuffer; - private readonly int _ichMin; - - /// - /// For the "NA" value, this is -1; otherwise, it is the number of characters in the text. - /// - public readonly int Length; - - private int IchLim => _ichMin + Length; - - /// - /// Gets a DvText that represents "NA", aka "Missing". - /// - public static DvText NA => new DvText(missing: true); - - /// - /// Gets an empty (zero character) DvText. - /// - public static DvText Empty => default(DvText); - - /// - /// Gets whether this DvText contains any characters. Equivalent to Length > 0. - /// - public bool HasChars => Length > 0; - - /// - /// Gets whether this DvText is empty (distinct from NA). Equivalent to Length == 0. - /// - public bool IsEmpty - { - get - { - Contracts.Assert(Length >= -1); - return Length == 0; - } - } - - /// - /// Gets whether this DvText represents "NA". Equivalent to Length == -1. - /// - public bool IsNA - { - get - { - Contracts.Assert(Length >= -1); - return Length < 0; - } - } - - /// - /// Gets the indicated character in the text. - /// - public char this[int ich] - { - get - { - Contracts.CheckParam(0 <= ich & ich < Length, nameof(ich)); - return _outerBuffer[ich + _ichMin]; - } - } - - private DvText(bool missing) - { - _outerBuffer = null; - _ichMin = 0; - Length = missing ? -1 : 0; - } - - /// - /// Constructor using the indicated range of characters in the given string. - /// - public DvText(string text, int ichMin, int ichLim) - { - Contracts.CheckValueOrNull(text); - Contracts.CheckParam(0 <= ichMin & ichMin <= Utils.Size(text), nameof(ichMin)); - Contracts.CheckParam(ichMin <= ichLim & ichLim <= Utils.Size(text), nameof(ichLim)); - Length = ichLim - ichMin; - if (Length == 0) - { - _outerBuffer = null; - _ichMin = 0; - } - else - { - _outerBuffer = text; - _ichMin = ichMin; - } - } - - /// - /// Constructor using the indicated string. - /// - public DvText(string text) - { - Contracts.CheckValueOrNull(text); - Length = Utils.Size(text); - if (Length == 0) - _outerBuffer = null; - else - _outerBuffer = text; - _ichMin = 0; - } - - /// - /// This method retrieves the raw buffer information. The only characters that should be - /// referenced in the returned string are those between the returned min and lim indices. - /// If this is an NA value, the min will be zero and the lim will be -1. For either an - /// empty or NA value, the returned string may be null. - /// - public string GetRawUnderlyingBufferInfo(out int ichMin, out int ichLim) - { - ichMin = _ichMin; - ichLim = ichMin + Length; - return _outerBuffer; - } - - /// - /// This compares the two text values with NA propagation semantics. - /// - public static DvBool operator ==(DvText a, DvText b) - { - if (a.IsNA || b.IsNA) - return DvBool.NA; - - if (a.Length != b.Length) - return DvBool.False; - for (int i = 0; i < a.Length; i++) - { - if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i]) - return DvBool.False; - } - return DvBool.True; - } - - /// - /// This compares the two text values with NA propagation semantics. - /// - public static DvBool operator !=(DvText a, DvText b) - { - if (a.IsNA || b.IsNA) - return DvBool.NA; - - if (a.Length != b.Length) - return DvBool.True; - for (int i = 0; i < a.Length; i++) - { - if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i]) - return DvBool.True; - } - return DvBool.False; - } - - public override int GetHashCode() - { - if (IsNA) - return 0; - return (int)Hash(42); - } - - public override bool Equals(object obj) - { - if (obj is DvText) - return Equals((DvText)obj); - return false; - } - - /// - /// This implements IEquatable's Equals method. Returns true if both are NA. - /// For NA propagating equality comparison, use the == operator. - /// - public bool Equals(DvText b) - { - if (Length != b.Length) - return false; - Contracts.Assert(HasChars == b.HasChars); - for (int i = 0; i < Length; i++) - { - if (_outerBuffer[_ichMin + i] != b._outerBuffer[b._ichMin + i]) - return false; - } - return true; - } - - /// - /// Does not propagate NA values. Returns true if both are NA (same as a.Equals(b)). - /// For NA propagating equality comparison, use the == operator. - /// - public static bool Identical(DvText a, DvText b) - { - if (a.Length != b.Length) - return false; - if (a.HasChars) - { - Contracts.Assert(b.HasChars); - for (int i = 0; i < a.Length; i++) - { - if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i]) - return false; - } - } - return true; - } - - /// - /// Compare equality with the given system string value. Returns false if "this" is NA. - /// - public bool EqualsStr(string s) - { - Contracts.CheckValueOrNull(s); - - // Note that "NA" doesn't match any string. - if (s == null) - return Length == 0; - - if (s.Length != Length) - return false; - for (int i = 0; i < Length; i++) - { - if (s[i] != _outerBuffer[_ichMin + i]) - return false; - } - return true; - } - - /// - /// For implementation of . Uses code point comparison. - /// Generally, this is not appropriate for sorting for presentation to a user. - /// Sorts NA before everything else. - /// - public int CompareTo(DvText other) - { - if (IsNA) - return other.IsNA ? 0 : -1; - if (other.IsNA) - return +1; - - int len = Math.Min(Length, other.Length); - for (int ich = 0; ich < len; ich++) - { - char ch1 = _outerBuffer[_ichMin + ich]; - char ch2 = other._outerBuffer[other._ichMin + ich]; - if (ch1 != ch2) - return ch1 < ch2 ? -1 : +1; - } - if (len < other.Length) - return -1; - if (len < Length) - return +1; - return 0; - } - - /// - /// Return a DvText consisting of characters from ich to the end of this DvText. - /// - public DvText SubSpan(int ich) - { - Contracts.CheckParam(0 <= ich & ich <= Length, nameof(ich)); - return new DvText(_outerBuffer, ich + _ichMin, IchLim); - } - - /// - /// Return a DvText consisting of the indicated range of characters. - /// - public DvText SubSpan(int ichMin, int ichLim) - { - Contracts.CheckParam(0 <= ichMin & ichMin <= Length, nameof(ichMin)); - Contracts.CheckParam(ichMin <= ichLim & ichLim <= Length, nameof(ichLim)); - return new DvText(_outerBuffer, ichMin + _ichMin, ichLim + _ichMin); - } - - /// - /// Return a non-null string corresponding to the characters in this DvText. - /// Note that an empty string is returned for both Empty and NA. - /// - public override string ToString() - { - if (!HasChars) - return ""; - Contracts.AssertNonEmpty(_outerBuffer); - if (_ichMin == 0 && Length == _outerBuffer.Length) - return _outerBuffer; - return _outerBuffer.Substring(_ichMin, Length); - } - - public string ToString(int ichMin) - { - Contracts.CheckParam(0 <= ichMin & ichMin <= Length, nameof(ichMin)); - if (ichMin == Length) - return ""; - ichMin += _ichMin; - if (ichMin == 0 && Length == _outerBuffer.Length) - return _outerBuffer; - return _outerBuffer.Substring(ichMin, IchLim - ichMin); - } - - public IEnumerable Split(char[] separators) - { - Contracts.CheckValueOrNull(separators); - - if (!HasChars) - yield break; - - if (separators == null || separators.Length == 0) - { - yield return this; - yield break; - } - - string text = _outerBuffer; - int ichLim = IchLim; - if (separators.Length == 1) - { - char chSep = separators[0]; - for (int ichCur = _ichMin; ; ) - { - int ichMin = ichCur; - for (; ; ichCur++) - { - Contracts.Assert(ichCur <= ichLim); - if (ichCur >= ichLim) - { - yield return new DvText(text, ichMin, ichCur); - yield break; - } - if (text[ichCur] == chSep) - break; - } - - yield return new DvText(text, ichMin, ichCur); - - // Skip the separator. - ichCur++; - } - } - else - { - for (int ichCur = _ichMin; ; ) - { - int ichMin = ichCur; - for (; ; ichCur++) - { - Contracts.Assert(ichCur <= ichLim); - if (ichCur >= ichLim) - { - yield return new DvText(text, ichMin, ichCur); - yield break; - } - // REVIEW: Can this be faster? - if (ContainsChar(text[ichCur], separators)) - break; - } - - yield return new DvText(text, ichMin, ichCur); - - // Skip the separator. - ichCur++; - } - } - } - - /// - /// Splits this instance on the left-most occurrence of separator and produces the left - /// and right values. If this instance does not contain the separator character, - /// this returns false and sets to this instance and - /// to the default value. - /// - public bool SplitOne(char separator, out DvText left, out DvText right) - { - if (!HasChars) - { - left = this; - right = default(DvText); - return false; - } - - string text = _outerBuffer; - int ichMin = _ichMin; - int ichLim = IchLim; - - int ichCur = ichMin; - for (; ; ichCur++) - { - Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim); - if (ichCur >= ichLim) - { - left = this; - right = default(DvText); - return false; - } - if (text[ichCur] == separator) - break; - } - - // Note that we don't use any fields of "this" here in case one - // of the out parameters is the same as "this". - left = new DvText(text, ichMin, ichCur); - right = new DvText(text, ichCur + 1, ichLim); - return true; - } - - /// - /// Splits this instance on the left-most occurrence of an element of separators character array and - /// produces the left and right values. If this instance does not contain any of the - /// characters in separators, thiss return false and initializes to this instance - /// and to the default value. - /// - public bool SplitOne(char[] separators, out DvText left, out DvText right) - { - Contracts.CheckValueOrNull(separators); - - if (!HasChars || separators == null || separators.Length == 0) - { - left = this; - right = default(DvText); - return false; - } - - string text = _outerBuffer; - int ichMin = _ichMin; - int ichLim = IchLim; - - int ichCur = ichMin; - if (separators.Length == 1) - { - // Note: This duplicates code of the other SplitOne, but doing so improves perf because this is - // used so heavily in instances parsing. - char chSep = separators[0]; - for (; ; ichCur++) - { - Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim); - if (ichCur >= ichLim) - { - left = this; - right = default(DvText); - return false; - } - if (text[ichCur] == chSep) - break; - } - } - else - { - for (; ; ichCur++) - { - Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim); - if (ichCur >= ichLim) - { - left = this; - right = default(DvText); - return false; - } - // REVIEW: Can this be faster? - if (ContainsChar(text[ichCur], separators)) - break; - } - } - - // Note that we don't use any fields of "this" here in case one - // of the out parameters is the same as "this". - left = new DvText(text, _ichMin, ichCur); - right = new DvText(text, ichCur + 1, ichLim); - return true; - } - - /// - /// Splits this instance on the right-most occurrence of separator and produces the left - /// and right values. If this instance does not contain the separator character, - /// this returns false and sets to this instance and - /// to the default value. - /// - public bool SplitOneRight(char separator, out DvText left, out DvText right) - { - if (!HasChars) - { - left = this; - right = default(DvText); - return false; - } - - string text = _outerBuffer; - int ichMin = _ichMin; - int ichLim = IchLim; - - int ichCur = ichLim; - for (; ; ) - { - Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim); - if (--ichCur < ichMin) - { - left = this; - right = default(DvText); - return false; - } - if (text[ichCur] == separator) - break; - } - - // Note that we don't use any fields of "this" here in case one - // of the out parameters is the same as "this". - left = new DvText(text, ichMin, ichCur); - right = new DvText(text, ichCur + 1, ichLim); - return true; - } - - // REVIEW: Can this be faster? - private static bool ContainsChar(char ch, char[] rgch) - { - Contracts.CheckNonEmpty(rgch, nameof(rgch)); - - for (int i = 0; i < rgch.Length; i++) - { - if (rgch[i] == ch) - return true; - } - return false; - } - - /// - /// Returns a text span with leading and trailing spaces trimmed. Note that this - /// will remove only spaces, not any form of whitespace. - /// - public DvText Trim() - { - if (!HasChars) - return this; - int ichMin = _ichMin; - int ichLim = IchLim; - if (_outerBuffer[ichMin] != ' ' && _outerBuffer[ichLim - 1] != ' ') - return this; - - while (ichMin < ichLim && _outerBuffer[ichMin] == ' ') - ichMin++; - while (ichMin < ichLim && _outerBuffer[ichLim - 1] == ' ') - ichLim--; - return new DvText(_outerBuffer, ichMin, ichLim); - } - - /// - /// Returns a text span with leading and trailing whitespace trimmed. - /// - public DvText TrimWhiteSpace() - { - if (!HasChars) - return this; - int ichMin = _ichMin; - int ichLim = IchLim; - if (!char.IsWhiteSpace(_outerBuffer[ichMin]) && !char.IsWhiteSpace(_outerBuffer[ichLim - 1])) - return this; - - while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichMin])) - ichMin++; - while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichLim - 1])) - ichLim--; - return new DvText(_outerBuffer, ichMin, ichLim); - } - - /// - /// Returns a text span with trailing whitespace trimmed. - /// - public DvText TrimEndWhiteSpace() - { - if (!HasChars) - return this; - - int ichLim = IchLim; - if (!char.IsWhiteSpace(_outerBuffer[ichLim - 1])) - return this; - - int ichMin = _ichMin; - while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichLim - 1])) - ichLim--; - - return new DvText(_outerBuffer, ichMin, ichLim); - } - - /// - /// This produces zero for an empty string. - /// - public bool TryParse(out Single value) - { - if (IsNA) - { - value = Single.NaN; - return true; - } - var res = DoubleParser.Parse(out value, _outerBuffer, _ichMin, IchLim); - Contracts.Assert(res != DoubleParser.Result.Empty || value == 0); - return res <= DoubleParser.Result.Empty; - } - - /// - /// This produces zero for an empty string. - /// - public bool TryParse(out Double value) - { - if (IsNA) - { - value = Double.NaN; - return true; - } - var res = DoubleParser.Parse(out value, _outerBuffer, _ichMin, IchLim); - Contracts.Assert(res != DoubleParser.Result.Empty || value == 0); - return res <= DoubleParser.Result.Empty; - } - - public uint Hash(uint seed) - { - Contracts.Check(!IsNA); - return Hashing.MurmurHash(seed, _outerBuffer, _ichMin, IchLim); - } - - // REVIEW: Add method to NormStr.Pool that deal with DvText instead of the other way around. - public NormStr AddToPool(NormStr.Pool pool) - { - Contracts.Check(!IsNA); - Contracts.CheckValue(pool, nameof(pool)); - return pool.Add(_outerBuffer, _ichMin, IchLim); - } - - public NormStr FindInPool(NormStr.Pool pool) - { - Contracts.CheckValue(pool, nameof(pool)); - if (IsNA) - return null; - return pool.Get(_outerBuffer, _ichMin, IchLim); - } - - public void AddToStringBuilder(StringBuilder sb) - { - Contracts.CheckValue(sb, nameof(sb)); - if (HasChars) - sb.Append(_outerBuffer, _ichMin, Length); - } - - public void AddLowerCaseToStringBuilder(StringBuilder sb) - { - Contracts.CheckValue(sb, nameof(sb)); - if (HasChars) - { - int min = _ichMin; - int j; - for (j = min; j < IchLim; j++) - { - char ch = CharUtils.ToLowerInvariant(_outerBuffer[j]); - if (ch != _outerBuffer[j]) - { - sb.Append(_outerBuffer, min, j - min).Append(ch); - min = j + 1; - } - } - - Contracts.Assert(j == IchLim); - if (min != j) - sb.Append(_outerBuffer, min, j - min); - } - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index f7b91c3715..d952f57782 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -42,12 +42,13 @@ public static class Kinds public const string ScoreColumnSetId = "ScoreColumnSetId"; /// - /// Metadata kind that indicates the prediction kind as a string. E.g. "BinaryClassification". The value is typically a DvText. + /// Metadata kind that indicates the prediction kind as a string. E.g. "BinaryClassification". + /// The value is typically a ReadOnlyMemory<char>. /// public const string ScoreColumnKind = "ScoreColumnKind"; /// - /// Metadata kind that indicates the value kind of the score column as a string. E.g. "Score", "PredictedLabel", "Probability". The value is typically a DvText. + /// Metadata kind that indicates the value kind of the score column as a string. E.g. "Score", "PredictedLabel", "Probability". The value is typically a ReadOnlyMemory. /// public const string ScoreValueKind = "ScoreValueKind"; @@ -283,9 +284,9 @@ public static IEnumerable GetColumnSet(this ISchema schema, string metadata var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsText) { - DvText val = default(DvText); + ReadOnlyMemory val = default; schema.GetMetadata(metadataKind, col, ref val); - if (val.EqualsStr(value)) + if (ReadOnlyMemoryUtils.EqualsStr(value, val)) yield return col; } } @@ -295,7 +296,7 @@ public static IEnumerable GetColumnSet(this ISchema schema, string metadata /// Returns true if the specified column: /// * is a vector of length N (including 0) /// * has a SlotNames metadata - /// * metadata type is VBuffer<DvText> of length N + /// * metadata type is VBuffer<ReadOnlyMemory<char>> of length N /// public static bool HasSlotNames(this ISchema schema, int col, int vectorSize) { @@ -310,14 +311,14 @@ public static bool HasSlotNames(this ISchema schema, int col, int vectorSize) && type.ItemType.IsText; } - public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer slotNames) + public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer> slotNames) { Contracts.CheckValueOrNull(schema); Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize)); IReadOnlyList list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) - slotNames = new VBuffer(vectorSize, 0, slotNames.Values, slotNames.Indices); + slotNames = new VBuffer>(vectorSize, 0, slotNames.Values, slotNames.Indices); else schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); } @@ -343,12 +344,12 @@ public static bool HasKeyNames(this ISchema schema, int col, int keyCount) /// The schema to query /// Which column in the schema to query /// True if and only if the column has the metadata - /// set to the scalar value + /// set to the scalar value true public static bool IsNormalized(this ISchema schema, int col) { Contracts.CheckValue(schema, nameof(schema)); - var value = default(DvBool); - return schema.TryGetMetadata(BoolType.Instance, Kinds.IsNormalized, col, ref value) && value.IsTrue; + var value = default(bool); + return schema.TryGetMetadata(BoolType.Instance, Kinds.IsNormalized, col, ref value) && value; } /// @@ -436,9 +437,9 @@ public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex, return isValid; var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex); - if (type?.RawType == typeof(VBuffer)) + if (type?.RawType == typeof(VBuffer)) { - VBuffer catIndices = default(VBuffer); + VBuffer catIndices = default(VBuffer); schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices); VBufferUtils.Densify(ref catIndices); int columnSlotsCount = schema.GetColumnType(colIndex).AsVector.VectorSizeCore; @@ -448,19 +449,19 @@ public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex, isValid = true; for (int i = 0; i < catIndices.Values.Length; i += 2) { - if (catIndices.Values[i].RawValue > catIndices.Values[i + 1].RawValue || - catIndices.Values[i].RawValue <= previousEndIndex || - catIndices.Values[i].RawValue >= columnSlotsCount || - catIndices.Values[i + 1].RawValue >= columnSlotsCount) + if (catIndices.Values[i] > catIndices.Values[i + 1] || + catIndices.Values[i] <= previousEndIndex || + catIndices.Values[i] >= columnSlotsCount || + catIndices.Values[i + 1] >= columnSlotsCount) { isValid = false; break; } - previousEndIndex = catIndices.Values[i + 1].RawValue; + previousEndIndex = catIndices.Values[i + 1]; } if (isValid) - categoricalFeatures = catIndices.Values.Select(val => val.RawValue).ToArray(); + categoricalFeatures = catIndices.Values.Select(val => val).ToArray(); } } diff --git a/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs b/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs new file mode 100644 index 0000000000..4b207ab507 --- /dev/null +++ b/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs @@ -0,0 +1,269 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Internal.Utilities; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace Microsoft.ML.Runtime.Data +{ + public static class ReadOnlyMemoryUtils + { + + /// + /// Compare equality with the given system string value. + /// + public static bool EqualsStr(string s, ReadOnlyMemory memory) + { + Contracts.CheckValueOrNull(s); + + if (s == null) + return memory.Length == 0; + + if (s.Length != memory.Length) + return false; + + return memory.Span.SequenceEqual(s.AsSpan()); + } + + public static IEnumerable> Split(ReadOnlyMemory memory, char[] separators) + { + Contracts.CheckValueOrNull(separators); + + if (memory.IsEmpty) + yield break; + + if (separators == null || separators.Length == 0) + { + yield return memory; + yield break; + } + + var span = memory.Span; + if (separators.Length == 1) + { + char chSep = separators[0]; + for (int ichCur = 0; ;) + { + int nextSep = span.IndexOf(chSep); + if (nextSep == -1) + { + yield return memory.Slice(ichCur); + yield break; + } + + yield return memory.Slice(ichCur, nextSep); + + // Skip the separator. + ichCur += nextSep + 1; + span = memory.Slice(ichCur).Span; + } + } + else + { + for (int ichCur = 0; ;) + { + int nextSep = span.IndexOfAny(separators); + if (nextSep == -1) + { + yield return memory.Slice(ichCur); + yield break; + } + + yield return memory.Slice(ichCur, nextSep); + + // Skip the separator. + ichCur += nextSep + 1; + span = memory.Slice(ichCur).Span; + } + } + } + + /// + /// Splits on the left-most occurrence of separator and produces the left + /// and right of values. If does not contain the separator character, + /// this returns false and sets to this instance and + /// to the default of value. + /// + public static bool SplitOne(ReadOnlyMemory memory, char separator, out ReadOnlyMemory left, out ReadOnlyMemory right) + { + if (memory.IsEmpty) + { + left = memory; + right = default; + return false; + } + + int index = memory.Span.IndexOf(separator); + if (index == -1) + { + left = memory; + right = default; + return false; + } + + left = memory.Slice(0, index); + right = memory.Slice(index + 1, memory.Length - index - 1); + return true; + } + + /// + /// Splits on the left-most occurrence of an element of separators character array and + /// produces the left and right of values. If does not contain any of the + /// characters in separators, this return false and initializes to this instance + /// and to the default of value. + /// + public static bool SplitOne(ReadOnlyMemory memory, char[] separators, out ReadOnlyMemory left, out ReadOnlyMemory right) + { + Contracts.CheckValueOrNull(separators); + + if (memory.IsEmpty || separators == null || separators.Length == 0) + { + left = memory; + right = default; + return false; + } + + int index; + if (separators.Length == 1) + index = memory.Span.IndexOf(separators[0]); + else + index = memory.Span.IndexOfAny(separators); + + if (index == -1) + { + left = memory; + right = default; + return false; + } + + left = memory.Slice(0, index); + right = memory.Slice(index + 1, memory.Length - index - 1); + return true; + } + + /// + /// Returns a of with leading and trailing spaces trimmed. Note that this + /// will remove only spaces, not any form of whitespace. + /// + public static ReadOnlyMemory TrimSpaces(ReadOnlyMemory memory) + { + if (memory.IsEmpty) + return memory; + + int ichLim = memory.Length; + int ichMin = 0; + var span = memory.Span; + if (span[ichMin] != ' ' && span[ichLim - 1] != ' ') + return memory; + + while (ichMin < ichLim && span[ichMin] == ' ') + ichMin++; + while (ichMin < ichLim && span[ichLim - 1] == ' ') + ichLim--; + return memory.Slice(ichMin, ichLim - ichMin); + } + + /// + /// Returns a of with leading and trailing whitespace trimmed. + /// + public static ReadOnlyMemory TrimWhiteSpace(ReadOnlyMemory memory) + { + if (memory.IsEmpty) + return memory; + + int ichMin = 0; + int ichLim = memory.Length; + var span = memory.Span; + if (!char.IsWhiteSpace(span[ichMin]) && !char.IsWhiteSpace(span[ichLim - 1])) + return memory; + + while (ichMin < ichLim && char.IsWhiteSpace(span[ichMin])) + ichMin++; + while (ichMin < ichLim && char.IsWhiteSpace(span[ichLim - 1])) + ichLim--; + + return memory.Slice(ichMin, ichLim - ichMin); + } + + /// + /// Returns a of with trailing whitespace trimmed. + /// + public static ReadOnlyMemory TrimEndWhiteSpace(ReadOnlyMemory memory) + { + if (memory.IsEmpty) + return memory; + + int ichLim = memory.Length; + var span = memory.Span; + if (!char.IsWhiteSpace(span[ichLim - 1])) + return memory; + + while (0 < ichLim && char.IsWhiteSpace(span[ichLim - 1])) + ichLim--; + + return memory.Slice(0, ichLim); + } + + public static NormStr AddToPool(ReadOnlyMemory memory, NormStr.Pool pool) + { + Contracts.CheckValue(pool, nameof(pool)); + return pool.Add(memory); + } + + public static NormStr FindInPool(ReadOnlyMemory memory, NormStr.Pool pool) + { + Contracts.CheckValue(pool, nameof(pool)); + return pool.Get(memory); + } + + public static void AddLowerCaseToStringBuilder(ReadOnlySpan span, StringBuilder sb) + { + Contracts.CheckValue(sb, nameof(sb)); + + if (!span.IsEmpty) + { + int min = 0; + int j; + for (j = min; j < span.Length; j++) + { + char ch = CharUtils.ToLowerInvariant(span[j]); + if (ch != span[j]) + { + sb.AppendSpan(span.Slice(min, j - min)).Append(ch); + min = j + 1; + } + } + + Contracts.Assert(j == span.Length); + if (min != j) + sb.AppendSpan(span.Slice(min, j - min)); + } + } + + public static StringBuilder AppendMemory(this StringBuilder sb, ReadOnlyMemory memory) + { + Contracts.CheckValue(sb, nameof(sb)); + if (!memory.IsEmpty) + sb.AppendSpan(memory.Span); + + return sb; + } + + public static StringBuilder AppendSpan(this StringBuilder sb, ReadOnlySpan span) + { + unsafe + { + fixed (char* valueChars = &MemoryMarshal.GetReference(span)) + { + sb.Append(valueChars, span.Length); + } + } + + return sb; + } + } +} diff --git a/src/Microsoft.ML.Core/Data/TypeUtils.cs b/src/Microsoft.ML.Core/Data/TypeUtils.cs index 30a9e4008b..c5d92f1ee7 100644 --- a/src/Microsoft.ML.Core/Data/TypeUtils.cs +++ b/src/Microsoft.ML.Core/Data/TypeUtils.cs @@ -2,102 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Runtime.CompilerServices; - namespace Microsoft.ML.Runtime.Data { - using R4 = Single; - using R8 = Double; - using BL = DvBool; - using TX = DvText; - public delegate bool RefPredicate(ref T value); - - /// - /// Utilities for IDV standard types, including proper NA semantics. - /// - public static class TypeUtils - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNA(this R4 src) { return R4.IsNaN(src); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNA(this R8 src) { return R8.IsNaN(src); } - - #region R4 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Eq(this R4 a, R4 b) - { - return a == b ? BL.True : a.IsNA() || b.IsNA() ? BL.NA : BL.False; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Ne(this R4 a, R4 b) - { - return a != b ? a.IsNA() || b.IsNA() ? BL.NA : BL.True : BL.False; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Lt(this R4 a, R4 b) - { - return a < b ? BL.True : a >= b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Le(this R4 a, R4 b) - { - return a <= b ? BL.True : a > b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Ge(this R4 a, R4 b) - { - return a >= b ? BL.True : a < b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Gt(this R4 a, R4 b) - { - return a > b ? BL.True : a <= b ? BL.False : BL.NA; - } - #endregion R4 - - #region R8 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Eq(this R8 a, R8 b) - { - return a == b ? BL.True : a.IsNA() || b.IsNA() ? BL.NA : BL.False; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Ne(this R8 a, R8 b) - { - return a != b ? a.IsNA() || b.IsNA() ? BL.NA : BL.True : BL.False; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Lt(this R8 a, R8 b) - { - return a < b ? BL.True : a >= b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Le(this R8 a, R8 b) - { - return a <= b ? BL.True : a > b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Ge(this R8 a, R8 b) - { - return a >= b ? BL.True : a < b ? BL.False : BL.NA; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static BL Gt(this R8 a, R8 b) - { - return a > b ? BL.True : a <= b ? BL.False : BL.NA; - } - #endregion R8 - } } diff --git a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj index c7bbd498d3..a326e4af34 100644 --- a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj +++ b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj @@ -11,4 +11,8 @@ + + + + diff --git a/src/Microsoft.ML.Core/Utilities/DoubleParser.cs b/src/Microsoft.ML.Core/Utilities/DoubleParser.cs index 9ee245443b..a1a82d5218 100644 --- a/src/Microsoft.ML.Core/Utilities/DoubleParser.cs +++ b/src/Microsoft.ML.Core/Utilities/DoubleParser.cs @@ -70,25 +70,44 @@ public enum Result Error = 3 } - public static Result Parse(out Single value, string s, int ichMin, int ichLim) + /// + /// This produces zero for an empty string. + /// + public static bool TryParse(ReadOnlySpan span, out Single value) + { + var res = Parse(span, out value); + Contracts.Assert(res != Result.Empty || value == 0); + return res <= Result.Empty; + } + + /// + /// This produces zero for an empty string. + /// + public static bool TryParse(ReadOnlySpan span, out Double value) { - Contracts.Assert(0 <= ichMin && ichMin <= ichLim && ichLim <= Utils.Size(s)); + var res = Parse(span, out value); + Contracts.Assert(res != Result.Empty || value == 0); + return res <= Result.Empty; + } - for (; ; ichMin++) + public static Result Parse(ReadOnlySpan span, out Single value) + { + int ich = 0; + for (; ; ich++) { - if (ichMin >= ichLim) + if (ich >= span.Length) { value = 0; return Result.Empty; } - if (!char.IsWhiteSpace(s[ichMin])) + if (!char.IsWhiteSpace(span[ich])) break; } // Handle the common case of a single digit or ? - if (ichLim - ichMin == 1) + if (span.Length - ich == 1) { - char ch = s[ichMin]; + char ch = span[ich]; if (ch >= '0' && ch <= '9') { value = ch - '0'; @@ -102,16 +121,16 @@ public static Result Parse(out Single value, string s, int ichMin, int ichLim) } int ichEnd; - if (!DoubleParser.TryParse(out value, s, ichMin, ichLim, out ichEnd)) + if (!DoubleParser.TryParse(span.Slice(ich, span.Length - ich), out value, out ichEnd)) { value = default(Single); return Result.Error; } // Make sure everything was consumed. - while (ichEnd < ichLim) + while (ichEnd < span.Length) { - if (!char.IsWhiteSpace(s[ichEnd])) + if (!char.IsWhiteSpace(span[ichEnd])) return Result.Extra; ichEnd++; } @@ -119,25 +138,24 @@ public static Result Parse(out Single value, string s, int ichMin, int ichLim) return Result.Good; } - public static Result Parse(out Double value, string s, int ichMin, int ichLim) + public static Result Parse(ReadOnlySpan span, out Double value) { - Contracts.Assert(0 <= ichMin && ichMin <= ichLim && ichLim <= Utils.Size(s)); - - for (; ; ichMin++) + int ich = 0; + for (; ; ich++) { - if (ichMin >= ichLim) + if (ich >= span.Length) { value = 0; return Result.Empty; } - if (!char.IsWhiteSpace(s[ichMin])) + if (!char.IsWhiteSpace(span[ich])) break; } // Handle the common case of a single digit or ? - if (ichLim - ichMin == 1) + if (span.Length - ich == 1) { - char ch = s[ichMin]; + char ch = span[ich]; if (ch >= '0' && ch <= '9') { value = ch - '0'; @@ -151,16 +169,16 @@ public static Result Parse(out Double value, string s, int ichMin, int ichLim) } int ichEnd; - if (!DoubleParser.TryParse(out value, s, ichMin, ichLim, out ichEnd)) + if (!DoubleParser.TryParse(span.Slice(ich, span.Length - ich), out value, out ichEnd)) { value = default(Double); return Result.Error; } // Make sure everything was consumed. - while (ichEnd < ichLim) + while (ichEnd < span.Length) { - if (!char.IsWhiteSpace(s[ichEnd])) + if (!char.IsWhiteSpace(span[ichEnd])) return Result.Extra; ichEnd++; } @@ -168,15 +186,15 @@ public static Result Parse(out Double value, string s, int ichMin, int ichLim) return Result.Good; } - public static bool TryParse(out Single value, string s, int ichMin, int ichLim, out int ichEnd) + public static bool TryParse(ReadOnlySpan span, out Single value, out int ichEnd) { bool neg = false; ulong num = 0; long exp = 0; - ichEnd = ichMin; - if (!TryParseCore(s, ref ichEnd, ichLim, ref neg, ref num, ref exp)) - return TryParseSpecial(out value, s, ref ichEnd, ichLim); + ichEnd = 0; + if (!TryParseCore(span, ref ichEnd, ref neg, ref num, ref exp)) + return TryParseSpecial(span, ref ichEnd, out value); if (num == 0) { @@ -231,7 +249,7 @@ public static bool TryParse(out Single value, string s, int ichMin, int ichLim, #if COMPARE_BCL if (!_failed) { - string str = s.Substring(ichMin, ichEnd - ichMin); + string str = span.ToString(); Single x; if (!Single.TryParse(str, out x)) { @@ -257,15 +275,15 @@ public static bool TryParse(out Single value, string s, int ichMin, int ichLim, return true; } - public static bool TryParse(out Double value, string s, int ichMin, int ichLim, out int ichEnd) + public static bool TryParse(ReadOnlySpan span, out Double value, out int ichEnd) { bool neg = false; ulong num = 0; long exp = 0; - ichEnd = ichMin; - if (!TryParseCore(s, ref ichEnd, ichLim, ref neg, ref num, ref exp)) - return TryParseSpecial(out value, s, ref ichEnd, ichLim); + ichEnd = 0; + if (!TryParseCore(span, ref ichEnd, ref neg, ref num, ref exp)) + return TryParseSpecial(span, ref ichEnd, out value); if (num == 0) { @@ -413,7 +431,7 @@ public static bool TryParse(out Double value, string s, int ichMin, int ichLim, value = -value; #if COMPARE_BCL - string str = s.Substring(ichMin, ichEnd - ichMin); + string str = span.ToString(); Double x; if (!Double.TryParse(str, out x)) { @@ -440,19 +458,19 @@ public static bool TryParse(out Double value, string s, int ichMin, int ichLim, return true; } - private static bool TryParseSpecial(out Double value, string s, ref int ich, int ichLim) + private static bool TryParseSpecial(ReadOnlySpan span, ref int ich, out Double value) { Single tmp; - bool res = TryParseSpecial(out tmp, s, ref ich, ichLim); + bool res = TryParseSpecial(span, ref ich, out tmp); value = tmp; return res; } - private static bool TryParseSpecial(out Single value, string s, ref int ich, int ichLim) + private static bool TryParseSpecial(ReadOnlySpan span, ref int ich, out Single value) { - if (ich < ichLim) + if (ich < span.Length) { - switch (s[ich]) + switch (span[ich]) { case '?': // We also interpret ? to mean NaN. @@ -461,7 +479,7 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int return true; case 'N': - if (ich + 3 <= ichLim && s[ich + 1] == 'a' && s[ich + 2] == 'N') + if (ich + 3 <= span.Length && span[ich + 1] == 'a' && span[ich + 2] == 'N') { value = Single.NaN; ich += 3; @@ -470,7 +488,7 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int break; case 'I': - if (ich + 8 <= ichLim && s[ich + 1] == 'n' && s[ich + 2] == 'f' && s[ich + 3] == 'i' && s[ich + 4] == 'n' && s[ich + 5] == 'i' && s[ich + 6] == 't' && s[ich + 7] == 'y') + if (ich + 8 <= span.Length && span[ich + 1] == 'n' && span[ich + 2] == 'f' && span[ich + 3] == 'i' && span[ich + 4] == 'n' && span[ich + 5] == 'i' && span[ich + 6] == 't' && span[ich + 7] == 'y') { value = Single.PositiveInfinity; ich += 8; @@ -479,14 +497,14 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int break; case '-': - if (ich + 2 <= ichLim && s[ich + 1] == InfinitySymbol) + if (ich + 2 <= span.Length && span[ich + 1] == InfinitySymbol) { value = Single.NegativeInfinity; ich += 2; return true; } - if (ich + 9 <= ichLim && s[ich + 1] == 'I' && s[ich + 2] == 'n' && s[ich + 3] == 'f' && s[ich + 4] == 'i' && s[ich + 5] == 'n' && s[ich + 6] == 'i' && s[ich + 7] == 't' && s[ich + 8] == 'y') + if (ich + 9 <= span.Length && span[ich + 1] == 'I' && span[ich + 2] == 'n' && span[ich + 3] == 'f' && span[ich + 4] == 'i' && span[ich + 5] == 'n' && span[ich + 6] == 'i' && span[ich + 7] == 't' && span[ich + 8] == 'y') { value = Single.NegativeInfinity; ich += 9; @@ -505,15 +523,14 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int return false; } - private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg, ref ulong num, ref long exp) + private static bool TryParseCore(ReadOnlySpan span, ref int ich, ref bool neg, ref ulong num, ref long exp) { - Contracts.AssertValue(s); - Contracts.Assert(0 <= ich & ich <= ichLim & ichLim <= s.Length); + Contracts.Assert(0 <= ich & ich <= span.Length); Contracts.Assert(!neg); Contracts.Assert(num == 0); Contracts.Assert(exp == 0); - if (ich >= ichLim) + if (ich >= span.Length) return false; // If num gets bigger than this, we don't process additional digits. @@ -524,19 +541,19 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg // Get started: handle sign int i = ich; - switch (s[i]) + switch (span[i]) { default: return false; case '-': - if (++i >= ichLim) + if (++i >= span.Length) return false; neg = true; break; case '+': - if (++i >= ichLim) + if (++i >= span.Length) return false; break; @@ -561,8 +578,8 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg uint d; for (; ; ) { - Contracts.Assert(i < ichLim); - if ((d = (uint)s[i] - '0') > 9) + Contracts.Assert(i < span.Length); + if ((d = (uint)span[i] - '0') > 9) break; digits = true; @@ -571,33 +588,33 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg else exp++; - if (++i >= ichLim) + if (++i >= span.Length) { ich = i; return true; } } - Contracts.Assert(i < ichLim); + Contracts.Assert(i < span.Length); - if (s[i] != '.') + if (span[i] != '.') goto LAfterDigits; LPoint: - Contracts.Assert(i < ichLim); - Contracts.Assert(s[i] == '.'); + Contracts.Assert(i < span.Length); + Contracts.Assert(span[i] == '.'); // Get the digits after '.' for (; ; ) { - if (++i >= ichLim) + if (++i >= span.Length) { if (digits) ich = i; return digits; } - Contracts.Assert(i < ichLim); - if ((d = (uint)s[i] - '0') > 9) + Contracts.Assert(i < span.Length); + if ((d = (uint)span[i] - '0') > 9) break; digits = true; @@ -609,7 +626,7 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg } LAfterDigits: - Contracts.Assert(i < ichLim); + Contracts.Assert(i < span.Length); if (!digits) return false; @@ -617,30 +634,30 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg ich = i; // Check for an exponent. - switch (s[i]) + switch (span[i]) { default: return true; case 'e': case 'E': - if (++i >= ichLim) + if (++i >= span.Length) return true; break; } // Handle the exponent sign. bool expNeg = false; - Contracts.Assert(i < ichLim); - switch (s[i]) + Contracts.Assert(i < span.Length); + switch (span[i]) { case '-': - if (++i >= ichLim) + if (++i >= span.Length) return true; expNeg = true; break; case '+': - if (++i >= ichLim) + if (++i >= span.Length) return true; break; } @@ -656,14 +673,14 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg long e = 0; for (; ; ) { - Contracts.Assert(i < ichLim); - if ((d = (uint)s[i] - '0') > 9) + Contracts.Assert(i < span.Length); + if ((d = (uint)span[i] - '0') > 9) break; digits = true; if (e < eMax) e = 10 * e + (int)d; - if (++i >= ichLim) + if (++i >= span.Length) break; } diff --git a/src/Microsoft.ML.Core/Utilities/Hashing.cs b/src/Microsoft.ML.Core/Utilities/Hashing.cs index 5812937d72..a15677451b 100644 --- a/src/Microsoft.ML.Core/Utilities/Hashing.cs +++ b/src/Microsoft.ML.Core/Utilities/Hashing.cs @@ -11,6 +11,8 @@ namespace Microsoft.ML.Runtime.Internal.Utilities { public static class Hashing { + private const uint _defaultSeed = (5381 << 16) + 5381; + public static uint CombineHash(uint u1, uint u2) { return ((u1 << 7) | (u1 >> 25)) ^ u2; @@ -62,24 +64,10 @@ public static int HashInt(int n) } /// - /// Hash the characters in a string. This MUST produce the same result as the other - /// overloads (with equivalent characters). - /// - public static uint HashString(string str) - { - Contracts.AssertValue(str); - return MurmurHash((5381 << 16) + 5381, str, 0, str.Length); - } - - /// - /// Hash the characters in a sub-string. This MUST produce the same result - /// as HashString(str.SubString(ichMin, ichLim - ichMin)). + /// Hash the characters in a of . + /// This MUST produce the same result as the other overloads (with equivalent characters). /// - public static uint HashString(string str, int ichMin, int ichLim) - { - Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(str)); - return MurmurHash((5381 << 16) + 5381, str, ichMin, ichLim); - } + public static uint HashString(ReadOnlySpan str) => MurmurHash(_defaultSeed, str); /// /// Hash the characters in a string builder. This MUST produce the same result @@ -88,12 +76,12 @@ public static uint HashString(string str, int ichMin, int ichLim) public static uint HashString(StringBuilder sb) { Contracts.AssertValue(sb); - return MurmurHash((5381 << 16) + 5381, sb, 0, sb.Length); + return MurmurHash(_defaultSeed, sb, 0, sb.Length); } public static uint HashSequence(uint[] sequence, int min, int lim) { - return MurmurHash((5381 << 16) + 5381, sequence, min, lim); + return MurmurHash(_defaultSeed, sequence, min, lim); } /// @@ -125,23 +113,21 @@ public static uint MurmurRound(uint hash, uint chunk) /// * 0x0800 to 0xFFFF : 1110xxxx 10xxxxxx 10xxxxxx /// NOTE: This MUST match the StringBuilder version below. /// - public static uint MurmurHash(uint hash, string data, int ichMin, int ichLim, bool toUpper = false) + public static uint MurmurHash(uint hash, ReadOnlySpan span, bool toUpper = false) { - Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(data)); - // Byte length (in pseudo UTF-8 form). int len = 0; // Current bits, value and count. ulong cur = 0; int bits = 0; - for (int ich = ichMin; ich < ichLim; ich++) + for (int ich = 0; ich < span.Length; ich++) { Contracts.Assert((bits & 0x7) == 0); Contracts.Assert((uint)bits <= 24); Contracts.Assert(cur <= 0x00FFFFFF); - uint ch = toUpper ? char.ToUpperInvariant(data[ich]) : data[ich]; + uint ch = toUpper ? char.ToUpperInvariant(span[ich]) : span[ich]; if (ch <= 0x007F) { cur |= ch << bits; @@ -256,7 +242,7 @@ public static uint MurmurHash(uint hash, StringBuilder data, int ichMin, int ich // Final mixing ritual for the hash. hash = MixHash(hash); - Contracts.Assert(hash == MurmurHash(seed, data.ToString(), 0, data.Length)); + Contracts.Assert(hash == MurmurHash(seed, data.ToString().AsSpan())); return hash; } diff --git a/src/Microsoft.ML.Core/Utilities/NormStr.cs b/src/Microsoft.ML.Core/Utilities/NormStr.cs index 50b72196ed..fea018ac58 100644 --- a/src/Microsoft.ML.Core/Utilities/NormStr.cs +++ b/src/Microsoft.ML.Core/Utilities/NormStr.cs @@ -8,6 +8,7 @@ using System.Linq; using System.Threading; using System.Text; +using Microsoft.ML.Runtime.Data; namespace Microsoft.ML.Runtime.Internal.Utilities { @@ -18,37 +19,26 @@ namespace Microsoft.ML.Runtime.Internal.Utilities /// public sealed class NormStr { - public readonly string Value; + public readonly ReadOnlyMemory Value; public readonly int Id; private readonly uint _hash; /// /// NormStr's can only be created by the Pool. /// - private NormStr(string str, int id, uint hash) + private NormStr(ReadOnlyMemory str, int id, uint hash) { - Contracts.AssertValue(str); - Contracts.Assert(id >= 0 || id == -1 && str == ""); + Contracts.Assert(id >= 0 || id == -1 && str.IsEmpty); Value = str; Id = id; _hash = hash; } - public override string ToString() - { - return Value; - } - public override int GetHashCode() { return (int)_hash; } - public static implicit operator string(NormStr nstr) - { - return nstr.Value; - } - public sealed class Pool : IEnumerable { private int _mask; // Number of buckets minus 1. The number of buckets must be a power of two. @@ -107,7 +97,8 @@ public NormStr Get(string str, bool add = false) if (str == null) str = ""; - uint hash = Hashing.HashString(str); + var strSpan = str.AsSpan(); + uint hash = Hashing.HashString(strSpan); int ins = GetIns(hash); while (ins >= 0) { @@ -115,75 +106,50 @@ public NormStr Get(string str, bool add = false) if ((int)Utils.GetLo(meta) == str.Length) { var ns = GetNs(ins); - if (ns.Value == str) + if (strSpan.SequenceEqual(ns.Value.Span)) return ns; } ins = (int)Utils.GetHi(meta); } Contracts.Assert(ins == -1); - return add ? AddCore(str, hash) : null; + return add ? AddCore(str.AsMemory(), hash) : null; } - /// - /// Make sure the given string has an equivalent NormStr in the pool and return it. - /// - public NormStr Add(string str) - { - return Get(str, true); - } - - /// - /// Determine if the given sub-string has an equivalent NormStr in the pool. - /// - public NormStr Get(string str, int ichMin, int ichLim, bool add = false) + public NormStr Get(ReadOnlyMemory str, bool add = false) { AssertValid(); - Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(str)); - if (str == null) - return Get("", add); - - if (ichMin == 0 && ichLim == str.Length) - return Get(str, add); - - uint hash = Hashing.HashString(str, ichMin, ichLim); + var span = str.Span; + uint hash = Hashing.HashString(span); int ins = GetIns(hash); - if (ins >= 0) + while (ins >= 0) { - int cch = ichLim - ichMin; - var rgmeta = _rgmeta; - for (; ; ) + ulong meta = _rgmeta[ins]; + if ((int)Utils.GetLo(meta) == str.Length) { - ulong meta = rgmeta[ins]; - if ((int)Utils.GetLo(meta) == cch) - { - var ns = GetNs(ins); - var value = ns.Value; - for (int ich = 0; ; ich++) - { - if (ich == cch) - return ns; - if (value[ich] != str[ich + ichMin]) - break; - } - } - ins = (int)Utils.GetHi(meta); - if (ins < 0) - break; + var ns = GetNs(ins); + if (ns.Value.Span.SequenceEqual(span)) + return ns; } + ins = (int)Utils.GetHi(meta); } Contracts.Assert(ins == -1); - return add ? AddCore(str.Substring(ichMin, ichLim - ichMin), hash) : null; + return add ? AddCore(str, hash) : null; } /// - /// Make sure the given sub-string has an equivalent NormStr in the pool and return it. + /// Make sure the given string has an equivalent NormStr in the pool and return it. /// - public NormStr Add(string str, int ichMin, int ichLim) + public NormStr Add(string str) { - return Get(str, ichMin, ichLim, true); + return Get(str, true); + } + + public NormStr Add(ReadOnlyMemory str) + { + return Get(str, true); } /// @@ -212,7 +178,7 @@ public NormStr Get(StringBuilder sb, bool add = false) { if (ich == cch) return ns; - if (value[ich] != sb[ich]) + if (value.Span[ich] != sb[ich]) break; } } @@ -220,7 +186,7 @@ public NormStr Get(StringBuilder sb, bool add = false) } Contracts.Assert(ins == -1); - return add ? AddCore(sb.ToString(), hash) : null; + return add ? AddCore(sb.ToString().AsMemory(), hash) : null; } /// @@ -234,11 +200,10 @@ public NormStr Add(StringBuilder sb) /// /// Adds the item. Does NOT check for whether the item is already present. /// - private NormStr AddCore(string str, uint hash) + private NormStr AddCore(ReadOnlyMemory str, uint hash) { - Contracts.AssertValue(str); Contracts.Assert(str.Length >= 0); - Contracts.Assert(Hashing.HashString(str) == hash); + Contracts.Assert(Hashing.HashString(str.Span) == hash); if (_rgns == null) { diff --git a/src/Microsoft.ML.Core/Utilities/Stream.cs b/src/Microsoft.ML.Core/Utilities/Stream.cs index 5e2974e2a5..7fbf0148b9 100644 --- a/src/Microsoft.ML.Core/Utilities/Stream.cs +++ b/src/Microsoft.ML.Core/Utilities/Stream.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Float = System.Single; - using System; using System.Collections; using System.Collections.Generic; @@ -178,7 +176,7 @@ public static void WriteBytesNoCount(this BinaryWriter writer, byte[] values, in /// /// Writes a length prefixed array of Floats. /// - public static void WriteFloatArray(this BinaryWriter writer, Float[] values) + public static void WriteFloatArray(this BinaryWriter writer, float[] values) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -197,7 +195,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values) /// /// Writes a length prefixed array of Floats. /// - public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int count) + public static void WriteFloatArray(this BinaryWriter writer, float[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -211,7 +209,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int /// /// Writes a specified number of floats starting at the specified index from an array. /// - public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int start, int count) + public static void WriteFloatArray(this BinaryWriter writer, float[] values, int start, int count) { Contracts.AssertValue(writer); Contracts.AssertValue(values); @@ -225,7 +223,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int /// /// Writes a length prefixed array of Floats. /// - public static void WriteFloatArray(this BinaryWriter writer, IEnumerable values, int count) + public static void WriteFloatArray(this BinaryWriter writer, IEnumerable values, int count) { Contracts.AssertValue(writer); Contracts.AssertValue(values); @@ -244,7 +242,7 @@ public static void WriteFloatArray(this BinaryWriter writer, IEnumerable /// /// Writes an array of Floats without the length prefix. /// - public static void WriteFloatsNoCount(this BinaryWriter writer, Float[] values, int count) + public static void WriteFloatsNoCount(this BinaryWriter writer, float[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -257,7 +255,7 @@ public static void WriteFloatsNoCount(this BinaryWriter writer, Float[] values, /// /// Writes a length prefixed array of singles. /// - public static void WriteSingleArray(this BinaryWriter writer, Single[] values) + public static void WriteSingleArray(this BinaryWriter writer, float[] values) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -276,7 +274,7 @@ public static void WriteSingleArray(this BinaryWriter writer, Single[] values) /// /// Writes a length prefixed array of singles. /// - public static void WriteSingleArray(this BinaryWriter writer, Single[] values, int count) + public static void WriteSingleArray(this BinaryWriter writer, float[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -290,7 +288,7 @@ public static void WriteSingleArray(this BinaryWriter writer, Single[] values, i /// /// Writes an array of singles without the length prefix. /// - public static void WriteSinglesNoCount(this BinaryWriter writer, Single[] values, int count) + public static void WriteSinglesNoCount(this BinaryWriter writer, float[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -303,7 +301,7 @@ public static void WriteSinglesNoCount(this BinaryWriter writer, Single[] values /// /// Writes a length prefixed array of doubles. /// - public static void WriteDoubleArray(this BinaryWriter writer, Double[] values) + public static void WriteDoubleArray(this BinaryWriter writer, double[] values) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -315,14 +313,14 @@ public static void WriteDoubleArray(this BinaryWriter writer, Double[] values) } writer.Write(values.Length); - foreach (Double val in values) + foreach (double val in values) writer.Write(val); } /// /// Writes a length prefixed array of doubles. /// - public static void WriteDoubleArray(this BinaryWriter writer, Double[] values, int count) + public static void WriteDoubleArray(this BinaryWriter writer, double[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -336,7 +334,7 @@ public static void WriteDoubleArray(this BinaryWriter writer, Double[] values, i /// /// Writes an array of doubles without the length prefix. /// - public static void WriteDoublesNoCount(this BinaryWriter writer, Double[] values, int count) + public static void WriteDoublesNoCount(this BinaryWriter writer, double[] values, int count) { Contracts.AssertValue(writer); Contracts.AssertValueOrNull(values); @@ -427,7 +425,7 @@ public static void WriteBitArray(this BinaryWriter writer, BitArray arr) } } - public static long WriteSByteStream(this BinaryWriter writer, IEnumerable e) + public static long WriteSByteStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -438,7 +436,7 @@ public static long WriteSByteStream(this BinaryWriter writer, IEnumerable return c; } - public static long WriteByteStream(this BinaryWriter writer, IEnumerable e) + public static long WriteByteStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -449,7 +447,7 @@ public static long WriteByteStream(this BinaryWriter writer, IEnumerable e return c; } - public static long WriteIntStream(this BinaryWriter writer, IEnumerable e) + public static long WriteIntStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -460,7 +458,7 @@ public static long WriteIntStream(this BinaryWriter writer, IEnumerable e return c; } - public static long WriteUIntStream(this BinaryWriter writer, IEnumerable e) + public static long WriteUIntStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -471,7 +469,7 @@ public static long WriteUIntStream(this BinaryWriter writer, IEnumerable return c; } - public static long WriteShortStream(this BinaryWriter writer, IEnumerable e) + public static long WriteShortStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -482,7 +480,7 @@ public static long WriteShortStream(this BinaryWriter writer, IEnumerable return c; } - public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e) + public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -493,7 +491,7 @@ public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e) + public static long WriteLongStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -504,7 +502,7 @@ public static long WriteLongStream(this BinaryWriter writer, IEnumerable return c; } - public static long WriteULongStream(this BinaryWriter writer, IEnumerable e) + public static long WriteULongStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -515,7 +513,7 @@ public static long WriteULongStream(this BinaryWriter writer, IEnumerable e) + public static long WriteSingleStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -526,7 +524,7 @@ public static long WriteSingleStream(this BinaryWriter writer, IEnumerable e) + public static long WriteDoubleStream(this BinaryWriter writer, IEnumerable e) { long c = 0; foreach (var v in e) @@ -606,12 +604,12 @@ public static bool ReadBoolByte(this BinaryReader reader) return b != 0; } - public static Float ReadFloat(this BinaryReader reader) + public static float ReadFloat(this BinaryReader reader) { return reader.ReadSingle(); } - public static Float[] ReadFloatArray(this BinaryReader reader) + public static float[] ReadFloatArray(this BinaryReader reader) { Contracts.AssertValue(reader); @@ -620,16 +618,16 @@ public static Float[] ReadFloatArray(this BinaryReader reader) return ReadFloatArray(reader, size); } - public static Float[] ReadFloatArray(this BinaryReader reader, int size) + public static float[] ReadFloatArray(this BinaryReader reader, int size) { Contracts.AssertValue(reader); Contracts.Assert(size >= 0); if (size == 0) return null; - var values = new Float[size]; + var values = new float[size]; - long bufferSizeInBytes = (long)size * sizeof(Float); + long bufferSizeInBytes = (long)size * sizeof(float); if (bufferSizeInBytes < _bulkReadThresholdInBytes) { for (int i = 0; i < size; i++) @@ -649,14 +647,14 @@ public static Float[] ReadFloatArray(this BinaryReader reader, int size) return values; } - public static void ReadFloatArray(this BinaryReader reader, Float[] array, int start, int count) + public static void ReadFloatArray(this BinaryReader reader, float[] array, int start, int count) { Contracts.AssertValue(reader); Contracts.AssertValue(array); Contracts.Assert(0 <= start && start < array.Length); Contracts.Assert(0 < count && count <= array.Length - start); - long bufferReadLengthInBytes = (long)count * sizeof(Float); + long bufferReadLengthInBytes = (long)count * sizeof(float); if (bufferReadLengthInBytes < _bulkReadThresholdInBytes) { for (int i = 0; i < count; i++) @@ -668,15 +666,15 @@ public static void ReadFloatArray(this BinaryReader reader, Float[] array, int s { fixed (void* dst = array) { - long bufferBeginOffsetInBytes = (long)start * sizeof(Float); - long bufferSizeInBytes = ((long)array.Length - start) * sizeof(Float); + long bufferBeginOffsetInBytes = (long)start * sizeof(float); + long bufferSizeInBytes = ((long)array.Length - start) * sizeof(float); ReadBytes(reader, (byte*)dst + bufferBeginOffsetInBytes, bufferSizeInBytes, bufferReadLengthInBytes); } } } } - public static Single[] ReadSingleArray(this BinaryReader reader) + public static float[] ReadSingleArray(this BinaryReader reader) { Contracts.AssertValue(reader); int size = reader.ReadInt32(); @@ -684,15 +682,15 @@ public static Single[] ReadSingleArray(this BinaryReader reader) return ReadSingleArray(reader, size); } - public static Single[] ReadSingleArray(this BinaryReader reader, int size) + public static float[] ReadSingleArray(this BinaryReader reader, int size) { Contracts.AssertValue(reader); Contracts.Assert(size >= 0); if (size == 0) return null; - var values = new Single[size]; + var values = new float[size]; - long bufferSizeInBytes = (long)size * sizeof(Single); + long bufferSizeInBytes = (long)size * sizeof(float); if (bufferSizeInBytes < _bulkReadThresholdInBytes) { for (int i = 0; i < size; i++) @@ -712,7 +710,7 @@ public static Single[] ReadSingleArray(this BinaryReader reader, int size) return values; } - public static Double[] ReadDoubleArray(this BinaryReader reader) + public static double[] ReadDoubleArray(this BinaryReader reader) { Contracts.AssertValue(reader); @@ -721,15 +719,15 @@ public static Double[] ReadDoubleArray(this BinaryReader reader) return ReadDoubleArray(reader, size); } - public static Double[] ReadDoubleArray(this BinaryReader reader, int size) + public static double[] ReadDoubleArray(this BinaryReader reader, int size) { Contracts.AssertValue(reader); Contracts.Assert(size >= 0); if (size == 0) return null; - var values = new Double[size]; + var values = new double[size]; - long bufferSizeInBytes = (long)size * sizeof(Double); + long bufferSizeInBytes = (long)size * sizeof(double); if (bufferSizeInBytes < _bulkReadThresholdInBytes) { for (int i = 0; i < size; i++) diff --git a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs index 610c4ee25f..301603b14a 100644 --- a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs +++ b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs @@ -301,10 +301,10 @@ public static TScorerFactory GetScorerComponent( Contracts.AssertValue(mapper); ComponentCatalog.LoadableClassInfo info = null; - DvText scoreKind = default; + ReadOnlyMemory scoreKind = default; if (mapper.OutputSchema.ColumnCount > 0 && mapper.OutputSchema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreColumnKind, 0, ref scoreKind) && - scoreKind.HasChars) + !scoreKind.IsEmpty) { var loadName = scoreKind.ToString(); info = ComponentCatalog.GetLoadableClassInfo(loadName); diff --git a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs index 305dadd4f2..46bb704b6f 100644 --- a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs +++ b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs @@ -132,7 +132,7 @@ private static void PrintSchema(TextWriter writer, Arguments args, ISchema schem var itw = IndentingTextWriter.Wrap(writer); using (itw.Nest()) { - var names = default(VBuffer); + var names = default(VBuffer>); for (int col = 0; col < colLim; col++) { var name = schema.GetColumnName(col); @@ -171,7 +171,7 @@ private static void PrintSchema(TextWriter writer, Arguments args, ISchema schem bool verbose = args.Verbose ?? false; foreach (var kvp in names.Items(all: verbose)) { - if (verbose || kvp.Value.HasChars) + if (verbose || !kvp.Value.IsEmpty) itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value); } } diff --git a/src/Microsoft.ML.Data/Commands/TrainCommand.cs b/src/Microsoft.ML.Data/Commands/TrainCommand.cs index 431681ec2a..acc03b743f 100644 --- a/src/Microsoft.ML.Data/Commands/TrainCommand.cs +++ b/src/Microsoft.ML.Data/Commands/TrainCommand.cs @@ -443,7 +443,6 @@ public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITra { if (autoNorm != NormalizeOption.Yes) { - DvBool isNormalized = DvBool.False; if (!trainer.Info.NeedNormalization || schema.IsNormalized(featCol)) { ch.Info("Not adding a normalizer."); diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index 1c0e7cde08..b5f20eac5a 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -89,8 +89,8 @@ private void AssertValid() public static BufferBuilder CreateDefault() { - if (typeof(T) == typeof(DvText)) - return (BufferBuilder)(object)new BufferBuilder(TextCombiner.Instance); + if (typeof(T) == typeof(ReadOnlyMemory)) + return (BufferBuilder)(object)new BufferBuilder>(TextCombiner.Instance); if (typeof(T) == typeof(float)) return (BufferBuilder)(object)new BufferBuilder(FloatAdder.Instance); throw Contracts.Except($"Unrecognized type '{typeof(T)}' for default {nameof(BufferBuilder)}"); diff --git a/src/Microsoft.ML.Data/Data/Combiner.cs b/src/Microsoft.ML.Data/Data/Combiner.cs index ee45aee3e3..6335620b8b 100644 --- a/src/Microsoft.ML.Data/Data/Combiner.cs +++ b/src/Microsoft.ML.Data/Data/Combiner.cs @@ -19,7 +19,7 @@ public abstract class Combiner public abstract void Combine(ref T dst, T src); } - public sealed class TextCombiner : Combiner + public sealed class TextCombiner : Combiner> { private static volatile TextCombiner _instance; public static TextCombiner Instance @@ -36,8 +36,8 @@ private TextCombiner() { } - public override bool IsDefault(DvText value) { return value.Length == 0; } - public override void Combine(ref DvText dst, DvText src) + public override bool IsDefault(ReadOnlyMemory value) { return value.Length == 0; } + public override void Combine(ref ReadOnlyMemory dst, ReadOnlyMemory src) { Contracts.Check(IsDefault(dst)); dst = src; diff --git a/src/Microsoft.ML.Data/Data/Conversion.cs b/src/Microsoft.ML.Data/Data/Conversion.cs index 0a9833064a..1f08d63fc3 100644 --- a/src/Microsoft.ML.Data/Data/Conversion.cs +++ b/src/Microsoft.ML.Data/Data/Conversion.cs @@ -14,22 +14,18 @@ namespace Microsoft.ML.Runtime.Data.Conversion { - using BL = DvBool; - using DT = DvDateTime; - using DZ = DvDateTimeZone; - using I1 = DvInt1; - using I2 = DvInt2; - using I4 = DvInt4; - using I8 = DvInt8; + using BL = Boolean; + using DT = DateTime; + using DZ = DateTimeOffset; using R4 = Single; using R8 = Double; - using RawI1 = SByte; - using RawI2 = Int16; - using RawI4 = Int32; - using RawI8 = Int64; + using I1 = SByte; + using I2 = Int16; + using I4 = Int32; + using I8 = Int64; using SB = StringBuilder; - using TS = DvTimeSpan; - using TX = DvText; + using TX = ReadOnlyMemory; + using TS = TimeSpan; using U1 = Byte; using U2 = UInt16; using U4 = UInt32; @@ -244,41 +240,14 @@ private Conversions() AddStd(Convert); AddAux(Convert); - AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA(IsNA); AddIsNA(IsNA); AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA
(IsNA); - AddIsNA(IsNA); - - AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA(GetNA); + AddGetNA(GetNA); AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA
(GetNA); - AddGetNA(GetNA); - - AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA(HasNA); + AddHasNA(HasNA); AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA
(HasNA); - AddHasNA(HasNA); AddIsDef(IsDefault); AddIsDef(IsDefault); @@ -533,7 +502,7 @@ public bool TryGetStringConversion(ColumnType type, out ValueMapper(out ValueMapper conv) { DataKind kindSrc; - if (!_kinds.TryGetValue(typeof (TSrc), out kindSrc)) + if (!_kinds.TryGetValue(typeof(TSrc), out kindSrc)) { conv = null; return false; @@ -846,42 +815,24 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) // The IsNA methods are for efficient delegates (instance instead of static). #region IsNA - private bool IsNA(ref I1 src) => src.IsNA; - private bool IsNA(ref I2 src) => src.IsNA; - private bool IsNA(ref I4 src) => src.IsNA; - private bool IsNA(ref I8 src) => src.IsNA; - private bool IsNA(ref R4 src) => src.IsNA(); - private bool IsNA(ref R8 src) => src.IsNA(); - private bool IsNA(ref BL src) => src.IsNA; - private bool IsNA(ref TS src) => src.IsNA; - private bool IsNA(ref DT src) => src.IsNA; - private bool IsNA(ref DZ src) => src.IsNA; - private bool IsNA(ref TX src) => src.IsNA; + private bool IsNA(ref R4 src) => R4.IsNaN(src); + private bool IsNA(ref R8 src) => R8.IsNaN(src); #endregion IsNA #region HasNA - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer
src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } + private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (R4.IsNaN(src.Values[i])) return true; } return false; } + private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (R8.IsNaN(src.Values[i])) return true; } return false; } #endregion HasNA #region IsDefault - private bool IsDefault(ref I1 src) => src.RawValue == 0; - private bool IsDefault(ref I2 src) => src.RawValue == 0; - private bool IsDefault(ref I4 src) => src.RawValue == 0; - private bool IsDefault(ref I8 src) => src.RawValue == 0; + private bool IsDefault(ref I1 src) => src == default(I1); + private bool IsDefault(ref I2 src) => src == default(I2); + private bool IsDefault(ref I4 src) => src == default(I4); + private bool IsDefault(ref I8 src) => src == default(I8); private bool IsDefault(ref R4 src) => src == 0; private bool IsDefault(ref R8 src) => src == 0; private bool IsDefault(ref TX src) => src.IsEmpty; - private bool IsDefault(ref BL src) => src.IsFalse; + private bool IsDefault(ref BL src) => src == default; private bool IsDefault(ref U1 src) => src == 0; private bool IsDefault(ref U2 src) => src == 0; private bool IsDefault(ref U4 src) => src == 0; @@ -900,17 +851,8 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) #endregion HasZero #region GetNA - private void GetNA(ref I1 value) => value = I1.NA; - private void GetNA(ref I2 value) => value = I2.NA; - private void GetNA(ref I4 value) => value = I4.NA; - private void GetNA(ref I8 value) => value = I8.NA; private void GetNA(ref R4 value) => value = R4.NaN; private void GetNA(ref R8 value) => value = R8.NaN; - private void GetNA(ref BL value) => value = BL.NA; - private void GetNA(ref TS value) => value = TS.NA; - private void GetNA(ref DT value) => value = DT.NA; - private void GetNA(ref DZ value) => value = DZ.NA; - private void GetNA(ref TX value) => value = TX.NA; #endregion GetNA #region ToI1 @@ -1022,28 +964,28 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) #endregion ToR8 #region ToStringBuilder - public void Convert(ref I1 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); } - public void Convert(ref I2 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); } - public void Convert(ref I4 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); } - public void Convert(ref I8 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); } + public void Convert(ref I1 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); } + public void Convert(ref I2 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); } + public void Convert(ref I4 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); } + public void Convert(ref I8 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); } public void Convert(ref U1 src, ref SB dst) => ClearDst(ref dst).Append(src); public void Convert(ref U2 src, ref SB dst) => ClearDst(ref dst).Append(src); public void Convert(ref U4 src, ref SB dst) => ClearDst(ref dst).Append(src); public void Convert(ref U8 src, ref SB dst) => ClearDst(ref dst).Append(src); public void Convert(ref UG src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("0x{0:x16}{1:x16}", src.Hi, src.Lo); } - public void Convert(ref R4 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA()) dst.AppendFormat(CultureInfo.InvariantCulture, "{0:R}", src); } - public void Convert(ref R8 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA()) dst.AppendFormat(CultureInfo.InvariantCulture, "{0:G17}", src); } + public void Convert(ref R4 src, ref SB dst) { ClearDst(ref dst); if (R4.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:R}", src); } + public void Convert(ref R8 src, ref SB dst) { ClearDst(ref dst); if (R8.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:G17}", src); } public void Convert(ref BL src, ref SB dst) { ClearDst(ref dst); - if (src.IsFalse) + if (!src) dst.Append("0"); - else if (src.IsTrue) + else dst.Append("1"); } - public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:c}", (TimeSpan)src); } - public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTime)src); } - public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTimeOffset)src); } + public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:c}", src); } + public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); } + public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); } #endregion ToStringBuilder #region FromR4 @@ -1108,16 +1050,13 @@ public bool TryParse(ref TX src, out U4 dst) ///
public bool TryParse(ref TX src, out U8 dst) { - if (src.IsNA) + if (src.IsEmpty) { dst = 0; return false; } - int ichMin; - int ichLim; - string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); - return TryParseCore(text, ichMin, ichLim, out dst); + return TryParseCore(src.Span, out dst); } /// @@ -1130,16 +1069,15 @@ public bool TryParse(ref TX src, out U8 dst) /// and had only digits and the letters 'a' through 'f' or 'A' through 'F' as characters public bool TryParse(ref TX src, out UG dst) { + var span = src.Span; // REVIEW: Accomodate numeric inputs? - if (src.Length != 34 || src[0] != '0' || (src[1] != 'x' && src[1] != 'X')) + if (src.Length != 34 || span[0] != '0' || (span[1] != 'x' && span[1] != 'X')) { dst = default(UG); return false; } - int ichMin; - int ichLim; - string tx = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); - int offset = ichMin + 2; + + int offset = 2; ulong hi = 0; ulong num = 0; for (int i = 0; i < 2; ++i) @@ -1147,7 +1085,7 @@ public bool TryParse(ref TX src, out UG dst) for (int d = 0; d < 16; ++d) { num <<= 4; - char c = tx[offset++]; + char c = span[offset++]; // REVIEW: An exhaustive switch statement *might* be faster, maybe, at the // cost of being significantly longer. if ('0' <= c && c <= '9') @@ -1168,7 +1106,7 @@ public bool TryParse(ref TX src, out UG dst) num = 0; } } - Contracts.Assert(offset == ichLim); + Contracts.Assert(offset == src.Length); // The first read bits are the higher order bits, so they are listed second here. dst = new UG(num, hi); return true; @@ -1181,44 +1119,44 @@ public bool TryParse(ref TX src, out UG dst) /// The standard representations are any casing of: /// ? NaN NA N/A /// - private bool IsStdMissing(ref TX src) + private bool IsStdMissing(ref ReadOnlySpan span) { - Contracts.Assert(src.HasChars); + Contracts.Assert(!span.IsEmpty); char ch; - switch (src.Length) + switch (span.Length) { - default: - return false; - - case 1: - if (src[0] == '?') - return true; - return false; - case 2: - if ((ch = src[0]) != 'N' && ch != 'n') - return false; - if ((ch = src[1]) != 'A' && ch != 'a') + default: return false; - return true; - case 3: - if ((ch = src[0]) != 'N' && ch != 'n') + + case 1: + if (span[0] == '?') + return true; return false; - if ((ch = src[1]) == '/') - { - // Check for N/A. - if ((ch = src[2]) != 'A' && ch != 'a') + case 2: + if ((ch = span[0]) != 'N' && ch != 'n') return false; - } - else - { - // Check for NaN. - if (ch != 'a' && ch != 'A') + if ((ch = span[1]) != 'A' && ch != 'a') return false; - if ((ch = src[2]) != 'N' && ch != 'n') + return true; + case 3: + if ((ch = span[0]) != 'N' && ch != 'n') return false; - } - return true; + if ((ch = span[1]) == '/') + { + // Check for N/A. + if ((ch = span[2]) != 'A' && ch != 'a') + return false; + } + else + { + // Check for NaN. + if (ch != 'a' && ch != 'A') + return false; + if ((ch = span[2]) != 'N' && ch != 'n') + return false; + } + return true; } } @@ -1226,11 +1164,13 @@ private bool IsStdMissing(ref TX src) /// Utility to assist in parsing key-type values. The min and max values define /// the legal input value bounds. The output dst value is "normalized" so min is /// mapped to 1, max is mapped to 1 + (max - min). - /// Missing values are mapped to zero with a true return. + /// Exception is thrown for missing values. /// Unparsable or out of range values are mapped to zero with a false return. ///
public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst) { + var span = src.Span; + Contracts.Check(!IsStdMissing(ref span), "Missing text value cannot be converted to unsigned integer type."); Contracts.Assert(min <= max); // This simply ensures we don't have min == 0 and max == U8.MaxValue. This is illegal since @@ -1240,22 +1180,19 @@ public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst) // Both empty and missing map to zero (NA for key values) and that mapping is valid, // hence the true return. - if (!src.HasChars) + if (src.IsEmpty) { dst = 0; return true; } // Parse a ulong. - int ichMin; - int ichLim; - string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); ulong uu; - if (!TryParseCore(text, ichMin, ichLim, out uu)) + if (!TryParseCore(span, out uu)) { dst = 0; // Return true only for standard forms for NA. - return IsStdMissing(ref src); + return false; } if (min > uu || uu > max) @@ -1268,14 +1205,13 @@ public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst) return true; } - private bool TryParseCore(string text, int ich, int lim, out ulong dst) + private bool TryParseCore(ReadOnlySpan span, out ulong dst) { - Contracts.Assert(0 <= ich && ich <= lim && lim <= Utils.Size(text)); - ulong res = 0; - while (ich < lim) + int ich = 0; + while (ich < span.Length) { - uint d = (uint)text[ich++] - (uint)'0'; + uint d = (uint)span[ich++] - (uint)'0'; if (d >= 10) goto LFail; @@ -1301,71 +1237,70 @@ private bool TryParseCore(string text, int ich, int lim, out ulong dst) /// /// This produces zero for empty. It returns false if the text is not parsable or overflows. - /// On failure, it sets dst to the NA value. + /// On failure, it sets dst to the default value. /// public bool TryParse(ref TX src, out I1 dst) { - long res; - bool f = TryParseSigned(RawI1.MaxValue, ref src, out res); - Contracts.Assert(f || res == I1.RawNA); - Contracts.Assert((RawI1)res == res); - dst = (RawI1)res; - return f; + dst = default; + TryParseSigned(I1.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to sbyte."); + Contracts.Check((I1)res == res, "Overflow or underflow occured while converting value in text to sbyte."); + dst = (I1)res; + return true; } /// /// This produces zero for empty. It returns false if the text is not parsable or overflows. - /// On failure, it sets dst to the NA value. + /// On failure, it sets dst to the default value. /// public bool TryParse(ref TX src, out I2 dst) { - long res; - bool f = TryParseSigned(RawI2.MaxValue, ref src, out res); - Contracts.Assert(f || res == I2.RawNA); - Contracts.Assert((RawI2)res == res); - dst = (RawI2)res; - return f; + dst = default; + TryParseSigned(I2.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to short."); + Contracts.Check((I2)res == res, "Overflow or underflow occured while converting value in text to short."); + dst = (I2)res; + return true; } /// /// This produces zero for empty. It returns false if the text is not parsable or overflows. - /// On failure, it sets dst to the NA value. + /// On failure, it sets dst to the defualt value. /// public bool TryParse(ref TX src, out I4 dst) { - long res; - bool f = TryParseSigned(RawI4.MaxValue, ref src, out res); - Contracts.Assert(f || res == I4.RawNA); - Contracts.Assert((RawI4)res == res); - dst = (RawI4)res; - return f; + dst = default; + TryParseSigned(I4.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to int32."); + Contracts.Check((I4)res == res, "Overflow or underflow occured while converting value in text to int."); + dst = (I4)res; + return true; } /// /// This produces zero for empty. It returns false if the text is not parsable or overflows. - /// On failure, it sets dst to the NA value. + /// On failure, it sets dst to the default value. /// public bool TryParse(ref TX src, out I8 dst) { - long res; - bool f = TryParseSigned(RawI8.MaxValue, ref src, out res); - Contracts.Assert(f || res == I8.RawNA); - dst = res; - return f; + dst = default; + TryParseSigned(I8.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to long."); + dst = (I8)res; + return true; } /// /// Returns false if the text is not parsable as an non-negative long or overflows. /// - private bool TryParseNonNegative(string text, int ich, int lim, out long result) + private bool TryParseNonNegative(ReadOnlySpan span, out long result) { - Contracts.Assert(0 <= ich && ich <= lim && lim <= Utils.Size(text)); - long res = 0; - while (ich < lim) + int ich = 0; + while (ich < span.Length) { Contracts.Assert(res >= 0); - uint d = (uint)text[ich++] - (uint)'0'; + uint d = (uint)span[ich++] - (uint)'0'; if (d >= 10) goto LFail; @@ -1389,61 +1324,53 @@ private bool TryParseNonNegative(string text, int ich, int lim, out long result) /// /// This produces zero for empty. It returns false if the text is not parsable as a signed integer - /// or the result overflows. The min legal value is -max. The NA value is -max - 1. + /// or the result overflows. The min legal value is -max. The NA value null. /// When it returns false, result is set to the NA value. The result can be NA on true return, /// since some representations of NA are not considered parse failure. /// - private bool TryParseSigned(long max, ref TX span, out long result) + private void TryParseSigned(long max, ref TX text, out long? result) { Contracts.Assert(max > 0); Contracts.Assert((max & (max + 1)) == 0); - if (!span.HasChars) + if (text.IsEmpty) { - if (span.IsNA) - result = -max - 1; - else - result = 0; - return true; + result = default(long); + return; } - int ichMin; - int ichLim; - string text = span.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); - - long val; + ulong val; + var span = text.Span; if (span[0] == '-') { - if (span.Length == 1 || - !TryParseNonNegative(text, ichMin + 1, ichLim, out val) || - val > max) + if (span.Length == 1 || !TryParseCore(span.Slice(1), out val) || (val > ((ulong)max + 1))) { - result = -max - 1; - return false; + result = null; + return; } Contracts.Assert(val >= 0); result = -(long)val; - Contracts.Assert(long.MinValue < result && result <= 0); - return true; + Contracts.Assert(long.MinValue <= result && result <= 0); + return; } - if (!TryParseNonNegative(text, ichMin, ichLim, out val)) + long sVal; + if (!TryParseNonNegative(span, out sVal)) { - // Check for acceptable NA forms: ? NaN NA and N/A. - result = -max - 1; - return IsStdMissing(ref span); + result = null; + return; } - Contracts.Assert(val >= 0); - if (val > max) + Contracts.Assert(sVal >= 0); + if (sVal > max) { - result = -max - 1; - return false; + result = null; + return; } - result = (long)val; + result = (long)sVal; Contracts.Assert(0 <= result && result <= long.MaxValue); - return true; + return; } /// @@ -1452,10 +1379,11 @@ private bool TryParseSigned(long max, ref TX span, out long result) /// public bool TryParse(ref TX src, out R4 dst) { - if (src.TryParse(out dst)) + var span = src.Span; + if (DoubleParser.TryParse(span, out dst)) return true; dst = R4.NaN; - return IsStdMissing(ref src); + return IsStdMissing(ref span); } /// @@ -1464,108 +1392,90 @@ public bool TryParse(ref TX src, out R4 dst) /// public bool TryParse(ref TX src, out R8 dst) { - if (src.TryParse(out dst)) + var span = src.Span; + if (DoubleParser.TryParse(span, out dst)) return true; dst = R8.NaN; - return IsStdMissing(ref src); + return IsStdMissing(ref span); } public bool TryParse(ref TX src, out TS dst) { - if (!src.HasChars) + if (src.IsEmpty) { - if (src.IsNA) - dst = TS.NA; - else - dst = default(TS); + dst = default; return true; } - TimeSpan res; - if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out res)) - { - dst = new TS(res); + + if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out dst)) return true; - } - dst = TS.NA; - return IsStdMissing(ref src); + var span = src.Span; + Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value."); + return true; } public bool TryParse(ref TX src, out DT dst) { - if (!src.HasChars) + if (src.IsEmpty) { - if (src.IsNA) - dst = DvDateTime.NA; - else - dst = default(DvDateTime); + dst = default; return true; } - DateTime res; - if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out res)) - { - dst = new DT(res); + + if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out dst)) return true; - } - dst = DvDateTime.NA; - return IsStdMissing(ref src); + + var span = src.Span; + Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value."); + return true; } public bool TryParse(ref TX src, out DZ dst) { - if (!src.HasChars) + if (src.IsEmpty) { - if (src.IsNA) - dst = DvDateTimeZone.NA; - else - dst = default(DvDateTimeZone); + dst = default; return true; } - DateTimeOffset res; - if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out res)) - { - dst = new DZ(res); + + if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out dst)) return true; - } - dst = DvDateTimeZone.NA; - return IsStdMissing(ref src); + + var span = src.Span; + Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value."); + return true; } - // These map unparsable and overflow values to "NA", which is the value Ix.MinValue. Note that this NA - // value is the "evil" value - the non-zero value, x, such that x == -x. Note also, that for I4, this - // matches R's representation of NA. + // These throw an exception for unparsable and overflow values. private I1 ParseI1(ref TX src) { - long res; - bool f = TryParseSigned(RawI1.MaxValue, ref src, out res); - Contracts.Assert(f || res == I1.RawNA); - Contracts.Assert((RawI1)res == res); - return (RawI1)res; + TryParseSigned(I1.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to sbyte."); + Contracts.Check((I1)res == res, "Overflow or underflow occured while converting value in text to sbyte."); + return (I1)res; } private I2 ParseI2(ref TX src) { - long res; - bool f = TryParseSigned(RawI2.MaxValue, ref src, out res); - Contracts.Assert(f || res == I2.RawNA); - Contracts.Assert((RawI2)res == res); - return (RawI2)res; + TryParseSigned(I2.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to short."); + Contracts.Check((I2)res == res, "Overflow or underflow occured while converting value in text to short."); + return (I2)res; } private I4 ParseI4(ref TX src) { - long res; - bool f = TryParseSigned(RawI4.MaxValue, ref src, out res); - Contracts.Assert(f || res == I4.RawNA); - Contracts.Assert((RawI4)res == res); - return (RawI4)res; + TryParseSigned(I4.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to int."); + Contracts.Check((I4)res == res, "Overflow or underflow occured while converting value in text to int."); + return (I4)res; } private I8 ParseI8(ref TX src) { - long res; - bool f = TryParseSigned(RawI8.MaxValue, ref src, out res); - Contracts.Assert(f || res == I8.RawNA); - return res; + TryParseSigned(I8.MaxValue, ref src, out long? res); + Contracts.Check(res.HasValue, "Value could not be parsed from text to long."); + return res.Value; } // These map unparsable and overflow values to zero. The unsigned integer types do not have an NA value. @@ -1618,116 +1528,113 @@ private U8 ParseU8(ref TX span) ///
public bool TryParse(ref TX src, out BL dst) { - // NA text fails. - if (src.IsNA) - { - dst = BL.NA; - return true; - } + var span = src.Span; + + Contracts.Check(!IsStdMissing(ref span), "Missing text values cannot be converted to bool value."); char ch; switch (src.Length) { - case 0: - // Empty succeeds and maps to false. - dst = BL.False; - return true; - - case 1: - switch (src[0]) - { - case 'T': - case 't': - case 'Y': - case 'y': - case '1': - case '+': - dst = BL.True; - return true; - case 'F': - case 'f': - case 'N': - case 'n': - case '0': - case '-': - dst = BL.False; + case 0: + // Empty succeeds and maps to false. + dst = false; return true; - } - break; - case 2: - switch (src[0]) - { - case 'N': - case 'n': - if ((ch = src[1]) != 'O' && ch != 'o') - break; - dst = BL.False; - return true; - case '+': - if ((ch = src[1]) != '1') - break; - dst = BL.True; - return true; - case '-': - if ((ch = src[1]) != '1') - break; - dst = BL.False; - return true; - } - break; + case 1: + switch (span[0]) + { + case 'T': + case 't': + case 'Y': + case 'y': + case '1': + case '+': + dst = true; + return true; + case 'F': + case 'f': + case 'N': + case 'n': + case '0': + case '-': + dst = false; + return true; + } + break; - case 3: - switch (src[0]) - { - case 'Y': - case 'y': - if ((ch = src[1]) != 'E' && ch != 'e') - break; - if ((ch = src[2]) != 'S' && ch != 's') - break; - dst = BL.True; - return true; - } - break; + case 2: + switch (span[0]) + { + case 'N': + case 'n': + if ((ch = span[1]) != 'O' && ch != 'o') + break; + dst = false; + return true; + case '+': + if ((ch = span[1]) != '1') + break; + dst = true; + return true; + case '-': + if ((ch = span[1]) != '1') + break; + dst = false; + return true; + } + break; - case 4: - switch (src[0]) - { - case 'T': - case 't': - if ((ch = src[1]) != 'R' && ch != 'r') - break; - if ((ch = src[2]) != 'U' && ch != 'u') - break; - if ((ch = src[3]) != 'E' && ch != 'e') - break; - dst = BL.True; - return true; - } - break; + case 3: + switch (span[0]) + { + case 'Y': + case 'y': + if ((ch = span[1]) != 'E' && ch != 'e') + break; + if ((ch = span[2]) != 'S' && ch != 's') + break; + dst = true; + return true; + } + break; - case 5: - switch (src[0]) - { - case 'F': - case 'f': - if ((ch = src[1]) != 'A' && ch != 'a') - break; - if ((ch = src[2]) != 'L' && ch != 'l') - break; - if ((ch = src[3]) != 'S' && ch != 's') - break; - if ((ch = src[4]) != 'E' && ch != 'e') - break; - dst = BL.False; - return true; - } - break; + case 4: + switch (span[0]) + { + case 'T': + case 't': + if ((ch = span[1]) != 'R' && ch != 'r') + break; + if ((ch = span[2]) != 'U' && ch != 'u') + break; + if ((ch = span[3]) != 'E' && ch != 'e') + break; + dst = true; + return true; + } + break; + + case 5: + switch (span[0]) + { + case 'F': + case 'f': + if ((ch = span[1]) != 'A' && ch != 'a') + break; + if ((ch = span[2]) != 'L' && ch != 'l') + break; + if ((ch = span[3]) != 'S' && ch != 's') + break; + if ((ch = span[4]) != 'E' && ch != 'e') + break; + dst = false; + return true; + } + break; } - dst = BL.NA; - return IsStdMissing(ref src); + dst = false; + return false; } private bool TryParse(ref TX src, out TX dst) @@ -1773,16 +1680,18 @@ public void Convert(ref TX span, ref UG value) if (!TryParse(ref span, out value)) Contracts.Assert(value.Equals(default(UG))); } - public void Convert(ref TX span, ref R4 value) + public void Convert(ref TX src, ref R4 value) { - if (span.TryParse(out value)) + var span = src.Span; + if (DoubleParser.TryParse(span, out value)) return; // Unparsable is mapped to NA. value = R4.NaN; } - public void Convert(ref TX span, ref R8 value) + public void Convert(ref TX src, ref R8 value) { - if (span.TryParse(out value)) + var span = src.Span; + if (DoubleParser.TryParse(span, out value)) return; // Unparsable is mapped to NA. value = R8.NaN; @@ -1791,43 +1700,32 @@ public void Convert(ref TX span, ref TX value) { value = span; } - public void Convert(ref TX span, ref BL value) + public void Convert(ref TX src, ref BL value) { - // When TryParseBL returns false, it should have set value to NA. - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); + // When TryParseBL returns false, it should have set value to false. + if (!TryParse(ref src, out value)) + Contracts.Assert(!value); } public void Convert(ref TX src, ref SB dst) { ClearDst(ref dst); - if (src.HasChars) - src.AddToStringBuilder(dst); + if (!src.IsEmpty) + dst.AppendMemory(src); } - public void Convert(ref TX span, ref TS value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } - public void Convert(ref TX span, ref DT value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } - public void Convert(ref TX span, ref DZ value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } + public void Convert(ref TX span, ref TS value) => TryParse(ref span, out value); + public void Convert(ref TX span, ref DT value) => TryParse(ref span, out value); + public void Convert(ref TX span, ref DZ value) => TryParse(ref span, out value); + #endregion FromTX #region FromBL - public void Convert(ref BL src, ref I1 dst) => dst = (I1)src; - public void Convert(ref BL src, ref I2 dst) => dst = (I2)src; - public void Convert(ref BL src, ref I4 dst) => dst = (I4)src; - public void Convert(ref BL src, ref I8 dst) => dst = (I8)src; - public void Convert(ref BL src, ref R4 dst) => dst = (R4)src; - public void Convert(ref BL src, ref R8 dst) => dst = (R8)src; + public void Convert(ref BL src, ref I1 dst) => dst = (I1)(object)src; + public void Convert(ref BL src, ref I2 dst) => dst = (I2)(object)src; + public void Convert(ref BL src, ref I4 dst) => dst = (I4)(object)src; + public void Convert(ref BL src, ref I8 dst) => dst = (I8)(object)src; + public void Convert(ref BL src, ref R4 dst) => dst = System.Convert.ToSingle(src); + public void Convert(ref BL src, ref R8 dst) => dst = System.Convert.ToDouble(src); public void Convert(ref BL src, ref BL dst) => dst = src; #endregion FromBL } diff --git a/src/Microsoft.ML.Data/Data/DataViewUtils.cs b/src/Microsoft.ML.Data/Data/DataViewUtils.cs index 1db4d5ad0a..17307186fd 100644 --- a/src/Microsoft.ML.Data/Data/DataViewUtils.cs +++ b/src/Microsoft.ML.Data/Data/DataViewUtils.cs @@ -1312,14 +1312,14 @@ public ValueGetter GetGetter(int col) } } - public static ValueGetter[] PopulateGetterArray(IRowCursor cursor, List colIndices) + public static ValueGetter>[] PopulateGetterArray(IRowCursor cursor, List colIndices) { var n = colIndices.Count; - var getters = new ValueGetter[n]; + var getters = new ValueGetter>[n]; for (int i = 0; i < n; i++) { - ValueGetter getter; + ValueGetter> getter; var srcColIndex = colIndices[i]; var colType = cursor.Schema.GetColumnType(srcColIndex); @@ -1340,7 +1340,7 @@ public static ValueGetter[] PopulateGetterArray(IRowCursor cursor, List< return getters; } - public static ValueGetter GetSingleValueGetter(IRow cursor, int i, ColumnType colType) + public static ValueGetter> GetSingleValueGetter(IRow cursor, int i, ColumnType colType) { var floatGetter = cursor.GetGetter(i); T v = default(T); @@ -1359,18 +1359,18 @@ public static ValueGetter GetSingleValueGetter(IRow cursor, int i, Co } StringBuilder dst = null; - ValueGetter getter = - (ref DvText value) => + ValueGetter> getter = + (ref ReadOnlyMemory value) => { floatGetter(ref v); conversion(ref v, ref dst); string text = dst.ToString(); - value = new DvText(text); + value = text.AsMemory(); }; return getter; } - public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int colIndex, ColumnType colType) + public static ValueGetter> GetVectorFlatteningGetter(IRow cursor, int colIndex, ColumnType colType) { var vecGetter = cursor.GetGetter>(colIndex); var vbuf = default(VBuffer); @@ -1378,8 +1378,8 @@ public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int ValueMapper conversion; Conversions.Instance.TryGetStringConversion(colType, out conversion); StringBuilder dst = null; - ValueGetter getter = - (ref DvText value) => + ValueGetter> getter = + (ref ReadOnlyMemory value) => { vecGetter(ref vbuf); @@ -1393,7 +1393,7 @@ public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int conversion(ref v, ref dst); return dst.ToString(); })); - value = new DvText(string.Format("<{0}{1}>", stringRep, suffix)); + value = string.Format("<{0}{1}>", stringRep, suffix).AsMemory(); }; return getter; } diff --git a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs index 091fe26cb2..9f09e0da51 100644 --- a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs +++ b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs @@ -394,16 +394,16 @@ private static ValueGetter GetLabelGetterNotFloat(IRow cursor, int label Contracts.Assert(type != NumberType.R4 && type != NumberType.R8); - // DvBool type label mapping: True -> 1, False -> 0, NA -> NaN. + // boolean type label mapping: True -> 1, False -> 0. if (type.IsBool) { - var getBoolSrc = cursor.GetGetter(labelIndex); + var getBoolSrc = cursor.GetGetter(labelIndex); return (ref Single dst) => { - DvBool src = DvBool.NA; + bool src = default; getBoolSrc(ref src); - dst = (Single)src; + dst = Convert.ToSingle(src); }; } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs index 7bc0a8d2ad..582212738a 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs @@ -729,7 +729,13 @@ public void GetMetadata(string kind, int col, ref TValue value) /// /// Upper inclusive bound of versions this reader can read. /// - private const ulong ReaderVersion = MissingTextVersion; + private const ulong ReaderVersion = StandardDataTypesVersion; + + /// + /// The first version that removes DvTypes and uses .NET standard + /// data types. + /// + private const ulong StandardDataTypesVersion = 0x0001000100010006; /// /// The first version of the format that accomodated DvText.NA. diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs index d04adaf099..544a8c60f5 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs @@ -44,26 +44,28 @@ public CodecFactory(IHostEnvironment env, MemoryStreamPool memPool = null) _loadNameToCodecCreator = new Dictionary(); _simpleCodecTypeMap = new Dictionary(); // Register the current codecs. - RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new DvTextCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new TextCodec(this)); RegisterSimpleCodec(new BoolCodec(this)); RegisterSimpleCodec(new DateTimeCodec(this)); - RegisterSimpleCodec(new DateTimeZoneCodec(this)); + RegisterSimpleCodec(new DateTimeOffsetCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - // Register the old boolean reading codec. - var oldBool = new OldBoolCodec(this); - RegisterOtherCodec(oldBool.LoadName, oldBool.GetCodec); + // Register the old type system reading codec. + RegisterOtherCodec("DvBool", new OldBoolCodec(this).GetCodec); + RegisterOtherCodec("DvDateTimeZone", new DateTimeOffsetCodec(this).GetCodec); + RegisterOtherCodec("DvDateTime", new DateTimeCodec(this).GetCodec); + RegisterOtherCodec("DvTimeSpan", new UnsafeTypeCodec(this).GetCodec); RegisterOtherCodec("VBuffer", GetVBufferCodec); RegisterOtherCodec("Key", GetKeyCodec); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index f840773872..3e4f997431 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -179,10 +179,10 @@ public override string LoadName } // Gatekeeper to ensure T is a type that is supported by UnsafeTypeCodec. - // Throws an exception if T is neither a DvTimeSpan nor a NumberType. + // Throws an exception if T is neither a TimeSpan nor a NumberType. private static ColumnType UnsafeColumnType(Type type) { - return type == typeof(DvTimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type); + return type == typeof(TimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type); } public UnsafeTypeCodec(CodecFactory factory) @@ -305,9 +305,8 @@ public override void Read(T[] values, int index, int count) } } - private sealed class DvTextCodec : SimpleCodec + private sealed class TextCodec : SimpleCodec> { - private const int MissingBit = unchecked((int)0x80000000); private const int LengthMask = unchecked((int)0x7FFFFFFF); public override string LoadName @@ -320,43 +319,38 @@ public override string LoadName // int[entries]: The non-decreasing end-boundary character index array, with high bit set for "missing" values. // string: The UTF-8 encoded string, with the standard LEB128 byte-length preceeding it. - public DvTextCodec(CodecFactory factory) + public TextCodec(CodecFactory factory) : base(factory, TextType.Instance) { } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter> OpenWriter(Stream stream) { return new Writer(this, stream); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader> OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase + private sealed class Writer : ValueWriterBase> { private StringBuilder _builder; private List _boundaries; - public Writer(DvTextCodec codec, Stream stream) + public Writer(TextCodec codec, Stream stream) : base(codec.Factory, stream) { _builder = new StringBuilder(); _boundaries = new List(); } - public override void Write(ref DvText value) + public override void Write(ref ReadOnlyMemory value) { Contracts.Check(_builder != null, "writer was already committed"); - if (value.IsNA) - _boundaries.Add(_builder.Length | MissingBit); - else - { - value.AddToStringBuilder(_builder); - _boundaries.Add(_builder.Length); - } + _builder.AppendMemory(value); + _boundaries.Add(_builder.Length); } public override void Commit() @@ -378,14 +372,14 @@ public override long GetCommitLengthEstimate() } } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase> { private readonly int _entries; private readonly int[] _boundaries; private int _index; private string _text; - public Reader(DvTextCodec codec, Stream stream, int items) + public Reader(TextCodec codec, Stream stream, int items) : base(codec.Factory, stream) { _entries = Reader.ReadInt32(); @@ -408,29 +402,34 @@ public override void MoveNext() Contracts.Check(++_index < _entries, "reader already read all values"); } - public override void Get(ref DvText value) + public override void Get(ref ReadOnlyMemory value) { Contracts.Assert(_index < _entries); int b = _boundaries[_index + 1]; - if (b < 0) - value = DvText.NA; + int start = _boundaries[_index] & LengthMask; + if (b >= 0) + value = _text.AsMemory().Slice(start, (b & LengthMask) - start); else - value = new DvText(_text, _boundaries[_index] & LengthMask, b & LengthMask); + { + //For backward compatiblity when NA values existed, treat them + //as empty string. + value = ReadOnlyMemory.Empty; + } } } } /// - /// This is an older boolean code that reads from a form that serialized - /// 1 bit per value. The new encoding (implemented by a different codec) + /// This is a boolean code that reads from a form that serialized + /// 1 bit per value. The old encoding (implemented by a different codec) /// uses 2 bits per value so NA values can be supported. /// - private sealed class OldBoolCodec : SimpleCodec + private sealed class BoolCodec : SimpleCodec { // *** Binary block format *** // Packed bits. - public OldBoolCodec(CodecFactory factory) + public BoolCodec(CodecFactory factory) : base(factory, BoolType.Instance) { } @@ -440,24 +439,70 @@ public override string LoadName get { return typeof(bool).Name; } } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { - Contracts.Assert(false, "This older form only supports reading"); - throw Contracts.ExceptNotSupp("Writing single bit booleans no longer supported"); + return new Writer(this, stream); + } + + private sealed class Writer : ValueWriterBase + { + // Pack 8 values into 8 bits. + private byte _currentBits; + private long _numWritten; + private byte _currentIndex; + + public Writer(BoolCodec codec, Stream stream) + : base(codec.Factory, stream) + { + } + + public override void Write(ref bool value) + { + Contracts.Assert(0 <= _currentIndex && _currentIndex < 8); + + _numWritten++; + if (value) + _currentBits |= (byte)(1 << _currentIndex); + + _currentIndex++; + if (_currentIndex == 8) + { + Writer.Write(_currentBits); + _currentBits = 0; + _currentIndex = 0; + } + } + + // REVIEW: More efficient array writers are certainly possible. + + public override long GetCommitLengthEstimate() + { + return 4 * (((_numWritten - 1) >> 4) + 1); + } + + public override void Commit() + { + if (_currentIndex > 0) + { + Writer.Write(_currentBits); + _currentBits = 0; + _currentIndex = 0; + } + } } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { private byte _currentBits; private int _currentIndex; private int _remaining; - public Reader(OldBoolCodec codec, Stream stream, int items) + public Reader(BoolCodec codec, Stream stream, int items) : base(codec.Factory, stream) { _remaining = items; @@ -474,7 +519,7 @@ public override void MoveNext() _currentBits >>= 1; } - public override void Get(ref DvBool value) + public override void Get(ref bool value) { Contracts.Assert(0 <= _currentIndex, "have not moved in"); Contracts.Assert(_currentIndex < 8); @@ -483,83 +528,34 @@ public override void Get(ref DvBool value) } } - private sealed class BoolCodec : SimpleCodec + private sealed class OldBoolCodec : SimpleCodec { // *** Binary block format *** // Pack 16 values into 32 bits, with 00 for false, 01 for true and 10 for NA. - public BoolCodec(CodecFactory factory) + public OldBoolCodec(CodecFactory factory) : base(factory, BoolType.Instance) { } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { - return new Writer(this, stream); + Contracts.Assert(false, "This older form only supports reading"); + throw Contracts.ExceptNotSupp("Writing single bit booleans no longer supported"); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase - { - // Pack 16 values into 32 bits. - private int _currentBits; - private long _numWritten; - private int _currentIndex; - - public Writer(BoolCodec codec, Stream stream) - : base(codec.Factory, stream) - { - } - - public override void Write(ref DvBool value) - { - Contracts.Assert(0 <= _currentIndex && _currentIndex < 32); - Contracts.Assert((_currentIndex & 1) == 0); - - _numWritten++; - if (value.IsTrue) - _currentBits |= 1 << _currentIndex; - else if (!value.IsFalse) - _currentBits |= 2 << _currentIndex; - - _currentIndex += 2; - if (_currentIndex == 32) - { - Writer.Write(_currentBits); - _currentBits = 0; - _currentIndex = 0; - } - } - - // REVIEW: More efficient array writers are certainly possible. - - public override long GetCommitLengthEstimate() - { - return 4 * (((_numWritten - 1) >> 4) + 1); - } - - public override void Commit() - { - if (_currentIndex > 0) - { - Writer.Write(_currentBits); - _currentBits = 0; - _currentIndex = 0; - } - } - } - - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { private int _currentBits; private int _currentSlot; private int _remaining; - public Reader(BoolCodec codec, Stream stream, int items) + public Reader(OldBoolCodec codec, Stream stream, int items) : base(codec.Factory, stream) { _remaining = items; @@ -576,20 +572,20 @@ public override void MoveNext() _currentBits = (int)((uint)_currentBits >> 2); } - public override void Get(ref DvBool value) + public override void Get(ref bool value) { Contracts.Assert(0 <= _currentSlot, "have not moved in"); Contracts.Assert(_currentSlot < 16); switch (_currentBits & 0x3) { case 0x0: - value = DvBool.False; + value = false; break; case 0x1: - value = DvBool.True; + value = true; break; case 0x2: - value = DvBool.NA; + value = false; break; default: throw Contracts.ExceptDecode("Invalid bit pattern in BoolCodec"); @@ -598,24 +594,24 @@ public override void Get(ref DvBool value) } } - private sealed class DateTimeCodec : SimpleCodec + private sealed class DateTimeCodec : SimpleCodec { public DateTimeCodec(CodecFactory factory) : base(factory, DateTimeType.Instance) { } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { return new Writer(this, stream); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase + private sealed class Writer : ValueWriterBase { private long _numWritten; @@ -624,11 +620,9 @@ public Writer(DateTimeCodec codec, Stream stream) { } - public override void Write(ref DvDateTime value) + public override void Write(ref DateTime value) { - var ticks = value.Ticks.RawValue; - Contracts.Assert(ticks == DvInt8.RawNA || (ulong)ticks <= DvDateTime.MaxTicks); - Writer.Write(ticks); + Writer.Write(value.Ticks); _numWritten++; } @@ -639,14 +633,14 @@ public override void Commit() public override long GetCommitLengthEstimate() { - return _numWritten * sizeof(Int64); + return _numWritten * sizeof(long); } } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { private int _remaining; - private DvDateTime _value; + private DateTime _value; public Reader(DateTimeCodec codec, Stream stream, int items) : base(codec.Factory, stream) @@ -657,74 +651,64 @@ public Reader(DateTimeCodec codec, Stream stream, int items) public override void MoveNext() { Contracts.Assert(_remaining > 0, "already consumed all values"); - var value = Reader.ReadInt64(); - Contracts.CheckDecode(value == DvInt8.RawNA || (ulong)value <= DvDateTime.MaxTicks); - _value = new DvDateTime(value); + + var ticks = Reader.ReadInt64(); + _value = new DateTime(ticks == long.MinValue ? default : ticks); _remaining--; } - public override void Get(ref DvDateTime value) + public override void Get(ref DateTime value) { value = _value; } } } - private sealed class DateTimeZoneCodec : SimpleCodec + private sealed class DateTimeOffsetCodec : SimpleCodec { - private readonly MadeObjectPool _shortBufferPool; private readonly MadeObjectPool _longBufferPool; + private readonly MadeObjectPool _shortBufferPool; - public DateTimeZoneCodec(CodecFactory factory) - : base(factory, DateTimeZoneType.Instance) + public DateTimeOffsetCodec(CodecFactory factory) + : base(factory, DateTimeOffsetType.Instance) { - _shortBufferPool = new MadeObjectPool(() => null); _longBufferPool = new MadeObjectPool(() => null); + _shortBufferPool = new MadeObjectPool(() => null); } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { return new Writer(this, stream); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase + private sealed class Writer : ValueWriterBase { private List _offsets; private List _ticks; - public Writer(DateTimeZoneCodec codec, Stream stream) + public Writer(DateTimeOffsetCodec codec, Stream stream) : base(codec.Factory, stream) { _offsets = new List(); _ticks = new List(); } - public override void Write(ref DvDateTimeZone value) + public override void Write(ref DateTimeOffset value) { Contracts.Assert(_offsets != null, "writer was already committed"); - var ticks = value.ClockDateTime.Ticks; - var offset = value.OffsetMinutes; + _ticks.Add(value.DateTime.Ticks); - _ticks.Add(ticks.RawValue); - if (ticks.IsNA) - { - Contracts.Assert(offset.IsNA); - _offsets.Add(0); - } - else - { - Contracts.Assert( - offset.RawValue >= DvDateTimeZone.MinMinutesOffset && - offset.RawValue <= DvDateTimeZone.MaxMinutesOffset); - Contracts.Assert(0 <= ticks.RawValue && ticks.RawValue <= DvDateTime.MaxTicks); - _offsets.Add(offset.RawValue); - } + //DateTimeOffset exposes its offset as a TimeSpan, but internally it uses short and in minutes. + //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L51-L53 + //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L286-L292 + //From everything online(ISO8601, RFC3339, SQL Server doc, the offset supports the range -14 to 14 hours, and only supports minute precision. + _offsets.Add((short)(value.Offset.TotalMinutes)); } public override void Commit() @@ -740,13 +724,13 @@ public override void Commit() public override long GetCommitLengthEstimate() { - return (long)_offsets.Count * (sizeof(Int64) + sizeof(Int16)); + return (long)_offsets.Count * (sizeof(long) + sizeof(short)); } } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { - private readonly DateTimeZoneCodec _codec; + private readonly DateTimeOffsetCodec _codec; private readonly int _entries; private short[] _offsets; @@ -754,7 +738,7 @@ private sealed class Reader : ValueReaderBase private int _index; private bool _disposed; - public Reader(DateTimeZoneCodec codec, Stream stream, int items) + public Reader(DateTimeOffsetCodec codec, Stream stream, int items) : base(codec.Factory, stream) { _codec = codec; @@ -764,17 +748,12 @@ public Reader(DateTimeZoneCodec codec, Stream stream, int items) _offsets = _codec._shortBufferPool.Get(); Utils.EnsureSize(ref _offsets, _entries, false); for (int i = 0; i < _entries; i++) - { _offsets[i] = Reader.ReadInt16(); - Contracts.CheckDecode(DvDateTimeZone.MinMinutesOffset <= _offsets[i] && _offsets[i] <= DvDateTimeZone.MaxMinutesOffset); - } + _ticks = _codec._longBufferPool.Get(); Utils.EnsureSize(ref _ticks, _entries, false); for (int i = 0; i < _entries; i++) - { _ticks[i] = Reader.ReadInt64(); - Contracts.CheckDecode(_ticks[i] == DvInt8.RawNA || (ulong)_ticks[i] <= DvDateTime.MaxTicks); - } } public override void MoveNext() @@ -783,10 +762,12 @@ public override void MoveNext() Contracts.Check(++_index < _entries, "reader already read all values"); } - public override void Get(ref DvDateTimeZone value) + public override void Get(ref DateTimeOffset value) { Contracts.Assert(!_disposed); - value = new DvDateTimeZone(_ticks[_index], _offsets[_index]); + var ticks = _ticks[_index]; + var offset = _offsets[_index]; + value = new DateTimeOffset(new DateTime(ticks == long.MinValue ? default : ticks), new TimeSpan(0, offset == short.MinValue ? default : offset, 0)); } public override void Dispose() diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs index 36186cf7af..b552ab6523 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs @@ -34,8 +34,9 @@ public struct Header //public const ulong WriterVersion = 0x0001000100010002; // Codec changes. //public const ulong WriterVersion = 0x0001000100010003; // Slot names. //public const ulong WriterVersion = 0x0001000100010004; // Column metadata. - public const ulong WriterVersion = 0x0001000100010005; // "NA" DvText support. - public const ulong CanBeReadByVersion = 0x0001000100010005; + //public const ulong WriterVersion = 0x0001000100010005; // "NA" DvText support. + public const ulong WriterVersion = 0x0001000100010006; // Replace DvTypes with .NET Standard data types. + public const ulong CanBeReadByVersion = 0x0001000100010006; internal static string VersionToString(ulong v) { diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs index 026228d6be..b63f361fe2 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs @@ -32,21 +32,17 @@ internal static class UnsafeTypeOpsFactory static UnsafeTypeOpsFactory() { _type2ops = new Dictionary(); - _type2ops[typeof(SByte)] = new SByteUnsafeTypeOps(); - _type2ops[typeof(DvInt1)] = new DvI1UnsafeTypeOps(); + _type2ops[typeof(sbyte)] = new SByteUnsafeTypeOps(); _type2ops[typeof(Byte)] = new ByteUnsafeTypeOps(); - _type2ops[typeof(Int16)] = new Int16UnsafeTypeOps(); - _type2ops[typeof(DvInt2)] = new DvI2UnsafeTypeOps(); + _type2ops[typeof(short)] = new Int16UnsafeTypeOps(); _type2ops[typeof(UInt16)] = new UInt16UnsafeTypeOps(); - _type2ops[typeof(Int32)] = new Int32UnsafeTypeOps(); - _type2ops[typeof(DvInt4)] = new DvI4UnsafeTypeOps(); + _type2ops[typeof(int)] = new Int32UnsafeTypeOps(); _type2ops[typeof(UInt32)] = new UInt32UnsafeTypeOps(); - _type2ops[typeof(Int64)] = new Int64UnsafeTypeOps(); - _type2ops[typeof(DvInt8)] = new DvI8UnsafeTypeOps(); + _type2ops[typeof(long)] = new Int64UnsafeTypeOps(); _type2ops[typeof(UInt64)] = new UInt64UnsafeTypeOps(); _type2ops[typeof(Single)] = new SingleUnsafeTypeOps(); _type2ops[typeof(Double)] = new DoubleUnsafeTypeOps(); - _type2ops[typeof(DvTimeSpan)] = new DvTimeSpanUnsafeTypeOps(); + _type2ops[typeof(TimeSpan)] = new TimeSpanUnsafeTypeOps(); _type2ops[typeof(UInt128)] = new UgUnsafeTypeOps(); } @@ -55,29 +51,16 @@ public static UnsafeTypeOps Get() return (UnsafeTypeOps)_type2ops[typeof(T)]; } - private sealed class SByteUnsafeTypeOps : UnsafeTypeOps + private sealed class SByteUnsafeTypeOps : UnsafeTypeOps { - public override int Size { get { return sizeof(SByte); } } - public override unsafe void Apply(SByte[] array, Action func) + public override int Size { get { return sizeof(sbyte); } } + public override unsafe void Apply(sbyte[] array, Action func) { - fixed (SByte* pArray = array) + fixed (sbyte* pArray = array) func(new IntPtr(pArray)); } - public override void Write(SByte a, BinaryWriter writer) { writer.Write(a); } - public override SByte Read(BinaryReader reader) { return reader.ReadSByte(); } - } - - private sealed class DvI1UnsafeTypeOps : UnsafeTypeOps - { - public override int Size { get { return sizeof(SByte); } } - public override unsafe void Apply(DvInt1[] array, Action func) - { - fixed (DvInt1* pArray = array) - func(new IntPtr(pArray)); - } - - public override void Write(DvInt1 a, BinaryWriter writer) { writer.Write(a.RawValue); } - public override DvInt1 Read(BinaryReader reader) { return reader.ReadSByte(); } + public override void Write(sbyte a, BinaryWriter writer) { writer.Write(a); } + public override sbyte Read(BinaryReader reader) { return reader.ReadSByte(); } } private sealed class ByteUnsafeTypeOps : UnsafeTypeOps @@ -92,29 +75,16 @@ public override unsafe void Apply(Byte[] array, Action func) public override Byte Read(BinaryReader reader) { return reader.ReadByte(); } } - private sealed class Int16UnsafeTypeOps : UnsafeTypeOps + private sealed class Int16UnsafeTypeOps : UnsafeTypeOps { - public override int Size { get { return sizeof(Int16); } } - public override unsafe void Apply(Int16[] array, Action func) + public override int Size { get { return sizeof(short); } } + public override unsafe void Apply(short[] array, Action func) { - fixed (Int16* pArray = array) + fixed (short* pArray = array) func(new IntPtr(pArray)); } - public override void Write(Int16 a, BinaryWriter writer) { writer.Write(a); } - public override Int16 Read(BinaryReader reader) { return reader.ReadInt16(); } - } - - private sealed class DvI2UnsafeTypeOps : UnsafeTypeOps - { - public override int Size { get { return sizeof(Int16); } } - public override unsafe void Apply(DvInt2[] array, Action func) - { - fixed (DvInt2* pArray = array) - func(new IntPtr(pArray)); - } - - public override void Write(DvInt2 a, BinaryWriter writer) { writer.Write(a.RawValue); } - public override DvInt2 Read(BinaryReader reader) { return reader.ReadInt16(); } + public override void Write(short a, BinaryWriter writer) { writer.Write(a); } + public override short Read(BinaryReader reader) { return reader.ReadInt16(); } } private sealed class UInt16UnsafeTypeOps : UnsafeTypeOps @@ -129,29 +99,16 @@ public override unsafe void Apply(UInt16[] array, Action func) public override UInt16 Read(BinaryReader reader) { return reader.ReadUInt16(); } } - private sealed class Int32UnsafeTypeOps : UnsafeTypeOps - { - public override int Size { get { return sizeof(Int32); } } - public override unsafe void Apply(Int32[] array, Action func) - { - fixed (Int32* pArray = array) - func(new IntPtr(pArray)); - } - public override void Write(Int32 a, BinaryWriter writer) { writer.Write(a); } - public override Int32 Read(BinaryReader reader) { return reader.ReadInt32(); } - } - - private sealed class DvI4UnsafeTypeOps : UnsafeTypeOps + private sealed class Int32UnsafeTypeOps : UnsafeTypeOps { - public override int Size { get { return sizeof(Int32); } } - public override unsafe void Apply(DvInt4[] array, Action func) + public override int Size { get { return sizeof(int); } } + public override unsafe void Apply(int[] array, Action func) { - fixed (DvInt4* pArray = array) + fixed (int* pArray = array) func(new IntPtr(pArray)); } - - public override void Write(DvInt4 a, BinaryWriter writer) { writer.Write(a.RawValue); } - public override DvInt4 Read(BinaryReader reader) { return reader.ReadInt32(); } + public override void Write(int a, BinaryWriter writer) { writer.Write(a); } + public override int Read(BinaryReader reader) { return reader.ReadInt32(); } } private sealed class UInt32UnsafeTypeOps : UnsafeTypeOps @@ -166,29 +123,16 @@ public override unsafe void Apply(UInt32[] array, Action func) public override UInt32 Read(BinaryReader reader) { return reader.ReadUInt32(); } } - private sealed class Int64UnsafeTypeOps : UnsafeTypeOps + private sealed class Int64UnsafeTypeOps : UnsafeTypeOps { - public override int Size { get { return sizeof(Int64); } } - public override unsafe void Apply(Int64[] array, Action func) + public override int Size { get { return sizeof(long); } } + public override unsafe void Apply(long[] array, Action func) { - fixed (Int64* pArray = array) + fixed (long* pArray = array) func(new IntPtr(pArray)); } - public override void Write(Int64 a, BinaryWriter writer) { writer.Write(a); } - public override Int64 Read(BinaryReader reader) { return reader.ReadInt64(); } - } - - private sealed class DvI8UnsafeTypeOps : UnsafeTypeOps - { - public override int Size { get { return sizeof(Int64); } } - public override unsafe void Apply(DvInt8[] array, Action func) - { - fixed (DvInt8* pArray = array) - func(new IntPtr(pArray)); - } - - public override void Write(DvInt8 a, BinaryWriter writer) { writer.Write(a.RawValue); } - public override DvInt8 Read(BinaryReader reader) { return reader.ReadInt64(); } + public override void Write(long a, BinaryWriter writer) { writer.Write(a); } + public override long Read(BinaryReader reader) { return reader.ReadInt64(); } } private sealed class UInt64UnsafeTypeOps : UnsafeTypeOps @@ -227,17 +171,21 @@ public override unsafe void Apply(Double[] array, Action func) public override Double Read(BinaryReader reader) { return reader.ReadDouble(); } } - private sealed class DvTimeSpanUnsafeTypeOps : UnsafeTypeOps + private sealed class TimeSpanUnsafeTypeOps : UnsafeTypeOps { - public override int Size { get { return sizeof(Int64); } } - public override unsafe void Apply(DvTimeSpan[] array, Action func) + public override int Size { get { return sizeof(long); } } + public override unsafe void Apply(TimeSpan[] array, Action func) { - fixed (DvTimeSpan* pArray = array) + fixed (TimeSpan* pArray = array) func(new IntPtr(pArray)); } - public override void Write(DvTimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks.RawValue); } - public override DvTimeSpan Read(BinaryReader reader) { return new DvTimeSpan(reader.ReadInt64()); } + public override void Write(TimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks); } + public override TimeSpan Read(BinaryReader reader) + { + var ticks = reader.ReadInt64(); + return new TimeSpan(ticks == long.MinValue ? default : ticks); + } } private sealed class UgUnsafeTypeOps : UnsafeTypeOps diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs index d1b43dcf65..27ac28c717 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs @@ -368,7 +368,7 @@ private sealed class Cursor : RootCursorBase, IRowCursor private Delegate[] _getters; private Delegate[] _subGetters; // Cached getters of the sub-cursor. - private DvText[] _colValues; // Column values cached from the file path. + private ReadOnlyMemory[] _colValues; // Column values cached from the file path. private IRowCursor _subCursor; // Sub cursor of the current file. private IEnumerator _fileOrder; @@ -384,7 +384,7 @@ public Cursor(IChannelProvider provider, PartitionedFileLoader parent, IMultiStr _active = Utils.BuildArray(Schema.ColumnCount, predicate); _subActive = _active.Take(SubColumnCount).ToArray(); - _colValues = new DvText[Schema.ColumnCount - SubColumnCount]; + _colValues = new ReadOnlyMemory[Schema.ColumnCount - SubColumnCount]; _subGetters = new Delegate[SubColumnCount]; _getters = CreateGetters(); @@ -537,13 +537,13 @@ private void UpdateColumnValues(string path, List values) var source = _parent._srcDirIndex[i]; if (source >= 0 && source < values.Count) { - _colValues[i] = new DvText(values[source]); + _colValues[i] = values[source].AsMemory(); } else if (source == FilePathColIndex) { // Force Unix path for consistency. var cleanPath = path.Replace(@"\", @"/"); - _colValues[i] = new DvText(cleanPath); + _colValues[i] = cleanPath.AsMemory(); } } } @@ -602,7 +602,7 @@ private ValueGetter GetterDelegateCore(int col, ColumnType type) Ch.Check(col >= 0 && col < _colValues.Length); Ch.AssertValue(type); - var conv = Conversions.Instance.GetStandardConversion(TextType.Instance, type) as ValueMapper; + var conv = Conversions.Instance.GetStandardConversion(TextType.Instance, type) as ValueMapper, TValue>; if (conv == null) { throw Ch.Except("Invalid TValue: '{0}' of the conversion.", typeof(TValue)); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 391fb65739..3663c93cc4 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -511,12 +511,12 @@ private sealed class Bindings : ISchema { public readonly ColInfo[] Infos; public readonly Dictionary NameToInfoIndex; - private readonly VBuffer[] _slotNames; + private readonly VBuffer>[] _slotNames; // Empty iff either header+ not set in args, or if no header present, or upon load // there was no header stored in the model. - private readonly DvText _header; + private readonly ReadOnlyMemory _header; - private readonly MetadataUtils.MetadataGetter> _getSlotNames; + private readonly MetadataUtils.MetadataGetter>> _getSlotNames; private Bindings() { @@ -546,7 +546,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, int inputSize = parent._inputSize; ch.Assert(0 <= inputSize & inputSize < SrcLim); - List lines = null; + List> lines = null; if (headerFile != null) Cursor.GetSomeLines(headerFile, 1, ref lines); if (needInputSize && inputSize == 0) @@ -712,11 +712,11 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, Infos[iinfoOther] = ColInfo.Create(cols[iinfoOther].Name.Trim(), typeOther, segsNew.ToArray(), true); } - _slotNames = new VBuffer[Infos.Length]; + _slotNames = new VBuffer>[Infos.Length]; if ((parent.HasHeader || headerFile != null) && Utils.Size(lines) > 0) _header = lines[0]; - if (_header.HasChars) + if (!_header.IsEmpty) Parser.ParseSlotNames(parent, _header, Infos, _slotNames); ch.Done(); @@ -797,12 +797,12 @@ public Bindings(ModelLoadContext ctx, TextLoader parent) NameToInfoIndex[name] = iinfo; } - _slotNames = new VBuffer[Infos.Length]; + _slotNames = new VBuffer>[Infos.Length]; string result = null; ctx.TryLoadTextStream("Header.txt", reader => result = reader.ReadLine()); if (!string.IsNullOrEmpty(result)) - Parser.ParseSlotNames(parent, _header = new DvText(result), Infos, _slotNames); + Parser.ParseSlotNames(parent, _header = result.AsMemory(), Infos, _slotNames); } public void Save(ModelSaveContext ctx) @@ -850,7 +850,7 @@ public void Save(ModelSaveContext ctx) } // Save header in an easily human inspectable separate entry. - if (_header.HasChars) + if (!_header.IsEmpty) ctx.SaveTextStream("Header.txt", writer => writer.WriteLine(_header.ToString())); } @@ -924,7 +924,7 @@ public void GetMetadata(string kind, int col, ref TValue value) } } - private void GetSlotNames(int col, ref VBuffer dst) + private void GetSlotNames(int col, ref VBuffer> dst) { Contracts.Assert(0 <= col && col < ColumnCount); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs index 19b6d640cc..b23637b1a9 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs @@ -212,7 +212,7 @@ public override ValueGetter GetIdGetter() }; } - public static void GetSomeLines(IMultiStreamSource source, int count, ref List lines) + public static void GetSomeLines(IMultiStreamSource source, int count, ref List> lines) { Contracts.AssertValue(source); Contracts.Assert(count > 0); @@ -236,7 +236,7 @@ public static void GetSomeLines(IMultiStreamSource source, int count, ref List @@ -495,7 +495,7 @@ private void ThreadProc() for (; ; ) { // REVIEW: Avoid allocating a string for every line. This would probably require - // introducing a CharSpan type (similar to DvText but based on char[] or StringBuilder) + // introducing a CharSpan type (similar to ReadOnlyMemory but based on char[] or StringBuilder) // and implementing all the necessary conversion functionality on it. See task 3871. text = rdr.ReadLine(); if (text == null) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index c0d0f25b17..0d4331c59b 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -228,7 +228,7 @@ protected ColumnPipe(RowSet rows) public abstract void Reset(int irow, int size); // Passed by-ref for effeciency, not so it can be modified. - public abstract bool Consume(int irow, int index, ref DvText text); + public abstract bool Consume(int irow, int index, ref ReadOnlyMemory text); public abstract Delegate GetGetter(); } @@ -255,7 +255,7 @@ public override void Reset(int irow, int size) _values[irow] = default(TResult); } - public override bool Consume(int irow, int index, ref DvText text) + public override bool Consume(int irow, int index, ref ReadOnlyMemory text) { Contracts.Assert(0 <= irow && irow < _values.Length); Contracts.Assert(index == 0); @@ -332,7 +332,7 @@ public void Reset(int size) AssertValid(); } - public bool Consume(int index, ref DvText text) + public bool Consume(int index, ref ReadOnlyMemory text) { AssertValid(); Contracts.Assert(_indexPrev < index & index < _size); @@ -439,7 +439,7 @@ public override void Reset(int irow, int size) _values[irow].Reset(size); } - public override bool Consume(int irow, int index, ref DvText text) + public override bool Consume(int irow, int index, ref ReadOnlyMemory text) { Contracts.Assert(0 <= irow && irow < _values.Length); return _values[irow].Consume(index, ref text); @@ -510,7 +510,7 @@ private struct ScanInfo /// /// The current text for the entire line (all fields), and possibly more. /// - public readonly string TextBuf; + public ReadOnlyMemory TextBuf; /// /// The min position in to consider (all fields). @@ -531,7 +531,7 @@ private struct ScanInfo /// /// The (unquoted) text of the field. /// - public DvText Span; + public ReadOnlyMemory Span; /// /// Whether there was a quoting error in the field. @@ -558,16 +558,17 @@ private struct ScanInfo /// /// Initializes the ScanInfo. /// - public ScanInfo(ref DvText text, string path, long line) + public ScanInfo(ref ReadOnlyMemory text, string path, long line) : this() { - Contracts.Assert(!text.IsNA); Contracts.AssertValueOrNull(path); Contracts.Assert(line >= 0); Path = path; Line = line; - TextBuf = text.GetRawUnderlyingBufferInfo(out IchMinBuf, out IchLimBuf); + TextBuf = text; + IchMinBuf = 0; + IchLimBuf = text.Length; IchMinNext = IchMinBuf; } } @@ -584,13 +585,13 @@ private sealed class FieldSet // Source indices and associated text (parallel arrays). public int[] Indices; - public DvText[] Spans; + public ReadOnlyMemory[] Spans; public FieldSet() { // Always allocate/size Columns after Spans so even if exceptions are thrown we // are guaranteed that Spans.Length >= Columns.Length. - Spans = new DvText[8]; + Spans = new ReadOnlyMemory[8]; Indices = new int[8]; } @@ -687,7 +688,7 @@ public Parser(TextLoader parent) Contracts.Assert(_inputSize >= 0); } - public static void GetInputSize(TextLoader parent, List lines, out int minSize, out int maxSize) + public static void GetInputSize(TextLoader parent, List> lines, out int minSize, out int maxSize) { Contracts.AssertNonEmpty(lines); Contracts.Assert(parent._inputSize == 0, "Why is this being called when inputSize is known?"); @@ -700,12 +701,12 @@ public static void GetInputSize(TextLoader parent, List lines, out int m { foreach (var line in lines) { - var text = (parent._flags & Options.TrimWhitespace) != 0 ? line.TrimEndWhiteSpace() : line; - if (!text.HasChars) + var text = (parent._flags & Options.TrimWhitespace) != 0 ? ReadOnlyMemoryUtils.TrimEndWhiteSpace(line) : line; + if (text.IsEmpty) continue; // REVIEW: This is doing more work than we need, but makes sure we're consistent.... - int srcLim = impl.GatherFields(text); + int srcLim = impl.GatherFields(text, text.Span); // Don't need the fields, just srcLim. impl.Fields.Clear(); @@ -724,9 +725,9 @@ public static void GetInputSize(TextLoader parent, List lines, out int m } } - public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[] infos, VBuffer[] slotNames) + public static void ParseSlotNames(TextLoader parent, ReadOnlyMemory textHeader, ColInfo[] infos, VBuffer>[] slotNames) { - Contracts.Assert(textHeader.HasChars); + Contracts.Assert(!textHeader.IsEmpty); Contracts.Assert(infos.Length == slotNames.Length); var sb = new StringBuilder(); @@ -734,7 +735,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[ var impl = new HelperImpl(stats, parent._flags, parent._separators, parent._inputSize, int.MaxValue); try { - impl.GatherFields(textHeader); + impl.GatherFields(textHeader, textHeader.Span); } finally { @@ -742,7 +743,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[ } var header = impl.Fields; - var bldr = BufferBuilder.CreateDefault(); + var bldr = BufferBuilder>.CreateDefault(); for (int iinfo = 0; iinfo < infos.Length; iinfo++) { var info = infos[iinfo]; @@ -771,7 +772,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[ { var srcCur = header.Indices[isrc]; Contracts.Assert(min <= srcCur & srcCur < lim); - bldr.AddFeature(indexBase + srcCur, header.Spans[isrc].TrimWhiteSpace()); + bldr.AddFeature(indexBase + srcCur, ReadOnlyMemoryUtils.TrimWhiteSpace(header.Spans[isrc])); } } ivDst += sizeSeg; @@ -795,6 +796,24 @@ public RowSet CreateRowSet(ParseStats stats, int count, bool[] active) return rows; } + /// + /// Returns a of with trailing whitespace trimmed. + /// + private ReadOnlyMemory TrimEndWhiteSpace(ReadOnlyMemory memory, ReadOnlySpan span) + { + if (memory.IsEmpty) + return memory; + + int ichLim = memory.Length; + if (!char.IsWhiteSpace(span[ichLim - 1])) + return memory; + + while (0 < ichLim && char.IsWhiteSpace(span[ichLim - 1])) + ichLim--; + + return memory.Slice(0, ichLim); + } + public void ParseRow(RowSet rows, int irow, Helper helper, bool[] active, string path, long line, string text) { Contracts.AssertValue(rows); @@ -803,13 +822,14 @@ public void ParseRow(RowSet rows, int irow, Helper helper, bool[] active, string Contracts.Assert(active == null | Utils.Size(active) == _infos.Length); var impl = (HelperImpl)helper; - DvText lineSpan = new DvText(text); + var lineSpan = text.AsMemory(); + var span = lineSpan.Span; if ((_flags & Options.TrimWhitespace) != 0) - lineSpan = lineSpan.TrimEndWhiteSpace(); + lineSpan = TrimEndWhiteSpace(lineSpan, span); try { // Parse the spans into items, ensuring that sparse don't precede non-sparse. - int srcLim = impl.GatherFields(lineSpan, path, line); + int srcLim = impl.GatherFields(lineSpan, span, path, line); impl.Fields.AssertValid(); // REVIEW: When should we report inconsistency? @@ -855,7 +875,7 @@ private sealed class HelperImpl : Helper private readonly StringBuilder _sb; // Result of a blank field - either Missing or Empty, depending on _quoting. - private readonly DvText _blank; + private readonly ReadOnlyMemory _blank; public readonly FieldSet Fields; @@ -878,7 +898,7 @@ public HelperImpl(ParseStats stats, Options flags, char[] seps, int inputSize, i _quoting = (flags & Options.AllowQuoting) != 0; _sparse = (flags & Options.AllowSparse) != 0; _sb = new StringBuilder(); - _blank = _quoting ? DvText.NA : DvText.Empty; + _blank = ReadOnlyMemory.Empty; Fields = new FieldSet(); } @@ -902,7 +922,7 @@ private bool IsSep(char ch) /// Process the line of text into fields, stored in the Fields field. Ensures that sparse /// don't precede non-sparse. Returns the lim of the src columns. /// - public int GatherFields(DvText lineSpan, string path = null, long line = 0) + public int GatherFields(ReadOnlyMemory lineSpan, ReadOnlySpan span, string path = null, long line = 0) { Fields.AssertEmpty(); @@ -915,7 +935,7 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0) for (; ; ) { Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf); - bool more = FetchNextField(ref scan); + bool more = FetchNextField(ref scan, span); Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf); Contracts.Assert(scan.Index == -1); @@ -946,7 +966,7 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0) for (; ; ) { Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf); - bool more = FetchNextField(ref scan); + bool more = FetchNextField(ref scan, span); Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf); Contracts.Assert(scan.Index >= -1); @@ -992,16 +1012,24 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0) } var spanT = Fields.Spans[Fields.Count - 1]; - // Note that Convert produces NA if the text is unparsable. - DvInt4 csrc = default(DvInt4); - Conversion.Conversions.Instance.Convert(ref spanT, ref csrc); - csrcSparse = csrc.RawValue; - if (csrcSparse <= 0) + // Note that Convert throws exception the text is unparsable. + int csrc = default; + try + { + Conversions.Instance.Convert(ref spanT, ref csrc); + } + catch + { + Contracts.Assert(csrc == default); + } + + if (csrc <= 0) { _stats.LogBadFmt(ref scan, "Bad dimensionality or ambiguous sparse item. Use sparse=- for non-sparse file, and/or quote the value."); break; } + csrcSparse = csrc; srcLimFixed = Fields.Indices[--Fields.Count]; if (csrcSparse >= SrcLim - srcLimFixed) csrcSparse = SrcLim - srcLimFixed - 1; @@ -1065,18 +1093,17 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0) return inputSize; } - private bool FetchNextField(ref ScanInfo scan) + private bool FetchNextField(ref ScanInfo scan, ReadOnlySpan span) { Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf); var text = scan.TextBuf; int ichLim = scan.IchLimBuf; int ichCur = scan.IchMinNext; - if (!_sepContainsSpace) { // Ignore leading spaces - while (ichCur < ichLim && text[ichCur] == ' ') + while (ichCur < ichLim && span[ichCur] == ' ') ichCur++; } @@ -1093,29 +1120,29 @@ private bool FetchNextField(ref ScanInfo scan) } int ichMinRaw = ichCur; - if (_sparse && (uint)(text[ichCur] - '0') <= 9) + if (_sparse && (uint)(span[ichCur] - '0') <= 9) { // See if it is sparse. Avoid overflow by limiting the index to 9 digits. // REVIEW: This limits the src index to a billion. Is this acceptable? int ichEnd = Math.Min(ichLim, ichCur + 9); int ichCol = ichCur + 1; Contracts.Assert(ichCol <= ichEnd); - while (ichCol < ichEnd && (uint)(text[ichCol] - '0') <= 9) + while (ichCol < ichEnd && (uint)(span[ichCol] - '0') <= 9) ichCol++; - if (ichCol < ichLim && text[ichCol] == ':') + if (ichCol < ichLim && span[ichCol] == ':') { // It is sparse. Compute the index. int ind = 0; for (int ich = ichCur; ich < ichCol; ich++) - ind = ind * 10 + (text[ich] - '0'); + ind = ind * 10 + (span[ich] - '0'); ichCur = ichCol + 1; scan.Index = ind; // Skip spaces again. if (!_sepContainsSpace) { - while (ichCur < ichLim && text[ichCur] == ' ') + while (ichCur < ichLim && span[ichCur] == ' ') ichCur++; } @@ -1129,7 +1156,7 @@ private bool FetchNextField(ref ScanInfo scan) } Contracts.Assert(ichCur < ichLim); - if (text[ichCur] == '"' && _quoting) + if (span[ichCur] == '"' && _quoting) { // Quoted case. ichCur++; @@ -1144,13 +1171,13 @@ private bool FetchNextField(ref ScanInfo scan) scan.QuotingError = true; break; } - if (text[ichCur] == '"') + if (span[ichCur] == '"') { if (ichCur > ichRun) - _sb.Append(text, ichRun, ichCur - ichRun); + _sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun)); if (++ichCur >= ichLim) break; - if (text[ichCur] != '"') + if (span[ichCur] != '"') break; ichRun = ichCur; } @@ -1159,7 +1186,7 @@ private bool FetchNextField(ref ScanInfo scan) // Ignore any spaces between here and the next separator. Anything else is a formatting "error". for (; ichCur < ichLim; ichCur++) { - if (text[ichCur] == ' ') + if (span[ichCur] == ' ') { // End the loop if space is a sep, otherwise ignore this space. if (_sepContainsSpace) @@ -1168,18 +1195,16 @@ private bool FetchNextField(ref ScanInfo scan) else { // End the loop if this nonspace char is a sep, otherwise it is an error. - if (IsSep(text[ichCur])) + if (IsSep(span[ichCur])) break; scan.QuotingError = true; } } - if (scan.QuotingError) - scan.Span = DvText.NA; - else if (_sb.Length == 0) - scan.Span = DvText.Empty; + if (scan.QuotingError || _sb.Length == 0) + scan.Span = String.Empty.AsMemory(); else - scan.Span = new DvText(_sb.ToString()); + scan.Span = _sb.ToString().AsMemory(); } else { @@ -1193,7 +1218,7 @@ private bool FetchNextField(ref ScanInfo scan) Contracts.Assert(ichCur <= ichLim); if (ichCur >= ichLim) break; - if (_sep0 == text[ichCur]) + if (_sep0 == span[ichCur]) break; } } @@ -1204,7 +1229,7 @@ private bool FetchNextField(ref ScanInfo scan) Contracts.Assert(ichCur <= ichLim); if (ichCur >= ichLim) break; - if (_sep0 == text[ichCur] || _sep1 == text[ichCur]) + if (_sep0 == span[ichCur] || _sep1 == span[ichCur]) break; } } @@ -1215,7 +1240,7 @@ private bool FetchNextField(ref ScanInfo scan) Contracts.Assert(ichCur <= ichLim); if (ichCur >= ichLim) break; - if (IsSep(text[ichCur])) + if (IsSep(span[ichCur])) break; } } @@ -1223,7 +1248,7 @@ private bool FetchNextField(ref ScanInfo scan) if (ichMin >= ichCur) scan.Span = _blank; else - scan.Span = new DvText(text, ichMin, ichCur); + scan.Span = text.Slice(ichMin, ichCur - ichMin); } scan.IchLim = ichCur; @@ -1233,7 +1258,7 @@ private bool FetchNextField(ref ScanInfo scan) return false; } - Contracts.Assert(_seps.Contains(text[ichCur])); + Contracts.Assert(_seps.Contains(span[ichCur])); scan.IchMinNext = ichCur + 1; return true; } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index 48f3f9ddc3..64cf7f7faf 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -94,22 +94,22 @@ protected ValueWriterBase(PrimitiveType type, int source, char sep) if (type.IsText) { // For text we need to deal with escaping. - ValueMapper c = MapText; + ValueMapper, StringBuilder> c = MapText; Conv = (ValueMapper)(Delegate)c; } else if (type.IsTimeSpan) { - ValueMapper c = MapTimeSpan; + ValueMapper c = MapTimeSpan; Conv = (ValueMapper)(Delegate)c; } else if (type.IsDateTime) { - ValueMapper c = MapDateTime; + ValueMapper c = MapDateTime; Conv = (ValueMapper)(Delegate)c; } else if (type.IsDateTimeZone) { - ValueMapper c = MapDateTimeZone; + ValueMapper c = MapDateTimeZone; Conv = (ValueMapper)(Delegate)c; } else @@ -120,22 +120,22 @@ protected ValueWriterBase(PrimitiveType type, int source, char sep) Default = Sb.ToString(); } - protected void MapText(ref DvText src, ref StringBuilder sb) + protected void MapText(ref ReadOnlyMemory src, ref StringBuilder sb) { - TextSaverUtils.MapText(ref src, ref sb, Sep); + TextSaverUtils.MapText(src.Span, ref sb, Sep); } - protected void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb) + protected void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb) { TextSaverUtils.MapTimeSpan(ref src, ref sb); } - protected void MapDateTime(ref DvDateTime src, ref StringBuilder sb) + protected void MapDateTime(ref DateTime src, ref StringBuilder sb) { TextSaverUtils.MapDateTime(ref src, ref sb); } - protected void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb) + protected void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb) { TextSaverUtils.MapDateTimeZone(ref src, ref sb); } @@ -145,7 +145,7 @@ private sealed class VecValueWriter : ValueWriterBase { private readonly ValueGetter> _getSrc; private VBuffer _src; - private readonly VBuffer _slotNames; + private readonly VBuffer> _slotNames; private readonly int _slotCount; public VecValueWriter(IRowCursor cursor, VectorType type, int source, char sep) @@ -225,7 +225,7 @@ public override void WriteData(Action appendItem, out int le public override void WriteHeader(Action appendItem, out int length) { - var span = new DvText(_columnName); + var span = _columnName.AsMemory(); MapText(ref span, ref Sb); appendItem(Sb, 0); length = 1; @@ -796,29 +796,28 @@ private void WriteDenseTo(int dstLim, string defaultStr = null) internal static class TextSaverUtils { /// - /// Converts a DvText to a StringBuilder using TextSaver escaping and string quoting rules. + /// Converts a ReadOnlySpan to a StringBuilder using TextSaver escaping and string quoting rules. /// - internal static void MapText(ref DvText src, ref StringBuilder sb, char sep) + internal static void MapText(ReadOnlySpan span, ref StringBuilder sb, char sep) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (src.IsEmpty) + if (span.IsEmpty) sb.Append("\"\""); - else if (!src.IsNA) + else { - int ichMin; - int ichLim; - string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); + int ichMin = 0; + int ichLim = span.Length; int ichCur = ichMin; int ichRun = ichCur; bool quoted = false; // Strings that start with space need to be quoted. Contracts.Assert(ichCur < ichLim); - if (text[ichCur] == ' ') + if (span[ichCur] == ' ') { quoted = true; sb.Append('"'); @@ -826,7 +825,7 @@ internal static void MapText(ref DvText src, ref StringBuilder sb, char sep) for (; ichCur < ichLim; ichCur++) { - char ch = text[ichCur]; + char ch = span[ichCur]; if (ch != '"' && ch != sep && ch != ':') continue; if (!quoted) @@ -838,47 +837,47 @@ internal static void MapText(ref DvText src, ref StringBuilder sb, char sep) if (ch == '"') { if (ichRun < ichCur) - sb.Append(text, ichRun, ichCur - ichRun); + sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun)); sb.Append("\"\""); ichRun = ichCur + 1; } } Contracts.Assert(ichCur == ichLim); if (ichRun < ichCur) - sb.Append(text, ichRun, ichCur - ichRun); + sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun)); if (quoted) sb.Append('"'); } } - internal static void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb) + internal static void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:c}\"", (TimeSpan)src); + + sb.AppendFormat("\"{0:c}\"", src); } - internal static void MapDateTime(ref DvDateTime src, ref StringBuilder sb) + internal static void MapDateTime(ref DateTime src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:o}\"", (DateTime)src); + + sb.AppendFormat("\"{0:o}\"", src); } - internal static void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb) + internal static void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:o}\"", (DateTimeOffset)src); + + sb.AppendFormat("\"{0:o}\"", src); } } } diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs index 098c652203..98b0dba355 100644 --- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs +++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs @@ -21,8 +21,8 @@ public sealed class ArrayDataViewBuilder private readonly IHost _host; private readonly List _columns; private readonly List _names; - private readonly Dictionary>> _getSlotNames; - private readonly Dictionary>> _getKeyValues; + private readonly Dictionary>>> _getSlotNames; + private readonly Dictionary>>> _getKeyValues; private int? RowCount { @@ -41,8 +41,8 @@ public ArrayDataViewBuilder(IHostEnvironment env) _columns = new List(); _names = new List(); - _getSlotNames = new Dictionary>>(); - _getKeyValues = new Dictionary>>(); + _getSlotNames = new Dictionary>>>(); + _getKeyValues = new Dictionary>>>(); } /// @@ -62,7 +62,7 @@ private void CheckLength(string name, T[] values) /// by being assigned. Output values are returned simply by being assigned, so the /// type should be a type where assigning to a different /// value does not compromise the immutability of the source object (so, for example, - /// a scalar, string, or DvText would be perfectly acceptable, but a + /// a scalar, string, or ReadOnlyMemory would be perfectly acceptable, but a /// HashSet or VBuffer would not be). /// public void AddColumn(string name, PrimitiveType type, params T[] values) @@ -77,7 +77,7 @@ public void AddColumn(string name, PrimitiveType type, params T[] values) /// Constructs a new key column from an array where values are copied to output simply /// by being assigned. /// - public void AddColumn(string name, ValueGetter> getKeyValues, ulong keyMin, int keyCount, params uint[] values) + public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params uint[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); _host.CheckParam(keyCount > 0, nameof(keyCount)); @@ -90,7 +90,7 @@ public void AddColumn(string name, ValueGetter> getKeyValues, ul /// /// Creates a column with slot names from arrays. The added column will be re-interpreted as a buffer. /// - public void AddColumn(string name, ValueGetter> getNames, PrimitiveType itemType, params T[][] values) + public void AddColumn(string name, ValueGetter>> getNames, PrimitiveType itemType, params T[][] values) { _host.CheckValue(getNames, nameof(getNames)); _host.CheckParam(itemType != null && itemType.RawType == typeof(T), nameof(itemType)); @@ -115,7 +115,7 @@ public void AddColumn(string name, PrimitiveType itemType, params T[][] value /// /// Creates a column with slot names from arrays. The added column will be re-interpreted as a buffer and possibly sparsified. /// - public void AddColumn(string name, ValueGetter> getNames, PrimitiveType itemType, Combiner combiner, params T[][] values) + public void AddColumn(string name, ValueGetter>> getNames, PrimitiveType itemType, Combiner combiner, params T[][] values) { _host.CheckValue(getNames, nameof(getNames)); _host.CheckParam(itemType != null && itemType.RawType == typeof(T), nameof(itemType)); @@ -151,7 +151,7 @@ public void AddColumn(string name, PrimitiveType itemType, params VBuffer[ /// /// Adds a VBuffer{T} valued column. /// - public void AddColumn(string name, ValueGetter> getNames, PrimitiveType itemType, params VBuffer[] values) + public void AddColumn(string name, ValueGetter>> getNames, PrimitiveType itemType, params VBuffer[] values) { _host.CheckValue(getNames, nameof(getNames)); _host.CheckParam(itemType != null && itemType.RawType == typeof(T), nameof(itemType)); @@ -162,7 +162,7 @@ public void AddColumn(string name, ValueGetter> getNames, Pri } /// - /// Adds a DvText valued column from an array of strings. + /// Adds a ReadOnlyMemory valued column from an array of strings. /// public void AddColumn(string name, params string[] values) { @@ -196,8 +196,8 @@ private class SchemaImpl : ISchema private readonly ColumnType[] _columnTypes; private readonly string[] _names; private readonly Dictionary _name2col; - private readonly Dictionary>> _getSlotNamesDict; - private readonly Dictionary>> _getKeyValuesDict; + private readonly Dictionary>>> _getSlotNamesDict; + private readonly Dictionary>>> _getKeyValuesDict; public SchemaImpl(IExceptionContext ectx, ColumnType[] columnTypes, string[] names, ArrayDataViewBuilder builder) { @@ -268,25 +268,25 @@ public void GetMetadata(string kind, int col, ref TValue value) _ectx.CheckParam(0 <= col && col < ColumnCount, nameof(col)); if (kind == MetadataUtils.Kinds.SlotNames && _getSlotNamesDict.ContainsKey(_names[col])) - MetadataUtils.Marshal, TValue>(GetSlotNames, col, ref value); + MetadataUtils.Marshal>, TValue>(GetSlotNames, col, ref value); else if (kind == MetadataUtils.Kinds.KeyValues && _getKeyValuesDict.ContainsKey(_names[col])) - MetadataUtils.Marshal, TValue>(GetKeyValues, col, ref value); + MetadataUtils.Marshal>, TValue>(GetKeyValues, col, ref value); else throw MetadataUtils.ExceptGetMetadata(); } - private void GetSlotNames(int col, ref VBuffer dst) + private void GetSlotNames(int col, ref VBuffer> dst) { Contracts.Assert(_getSlotNamesDict.ContainsKey(_names[col])); - ValueGetter> get; + ValueGetter>> get; _getSlotNamesDict.TryGetValue(_names[col], out get); get(ref dst); } - private void GetKeyValues(int col, ref VBuffer dst) + private void GetKeyValues(int col, ref VBuffer> dst) { Contracts.Assert(_getKeyValuesDict.ContainsKey(_names[col])); - ValueGetter> get; + ValueGetter>> get; _getKeyValuesDict.TryGetValue(_names[col], out get); get(ref dst); } @@ -514,16 +514,16 @@ protected override void CopyOut(ref T src, ref T dst) /// /// A convenience column for converting strings into textspans. /// - private sealed class StringToTextColumn : Column + private sealed class StringToTextColumn : Column> { public StringToTextColumn(string[] values) : base(TextType.Instance, values) { } - protected override void CopyOut(ref string src, ref DvText dst) + protected override void CopyOut(ref string src, ref ReadOnlyMemory dst) { - dst = new DvText(src); + dst = src.AsMemory(); } } diff --git a/src/Microsoft.ML.Data/DataView/LambdaColumnMapper.cs b/src/Microsoft.ML.Data/DataView/LambdaColumnMapper.cs index 50602ca96b..26f9c2ade7 100644 --- a/src/Microsoft.ML.Data/DataView/LambdaColumnMapper.cs +++ b/src/Microsoft.ML.Data/DataView/LambdaColumnMapper.cs @@ -18,7 +18,7 @@ public static class LambdaColumnMapper // REVIEW: It would be nice to support propagation of select metadata. public static IDataView Create(IHostEnvironment env, string name, IDataView input, string src, string dst, ColumnType typeSrc, ColumnType typeDst, ValueMapper mapper, - ValueGetter> keyValueGetter = null, ValueGetter> slotNamesGetter = null) + ValueGetter>> keyValueGetter = null, ValueGetter>> slotNamesGetter = null) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(name, nameof(name)); @@ -69,7 +69,7 @@ public static IDataView Create(IHostEnvironment env, string name, ID else { Func, - ValueMapper, ValueGetter>, ValueGetter>, + ValueMapper, ValueGetter>>, ValueGetter>>, Impl> del = CreateImpl; var meth = del.GetMethodInfo().GetGenericMethodDefinition() .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst)); @@ -82,7 +82,7 @@ public static IDataView Create(IHostEnvironment env, string name, ID private static Impl CreateImpl( IHostEnvironment env, string name, IDataView input, Column col, ColumnType typeDst, ValueMapper map1, ValueMapper map2, - ValueGetter> keyValueGetter, ValueGetter> slotNamesGetter) + ValueGetter>> keyValueGetter, ValueGetter>> slotNamesGetter) { return new Impl(env, name, input, col, typeDst, map1, map2, keyValueGetter); } @@ -104,7 +104,7 @@ private sealed class Impl : OneToOneTransformBase public Impl(IHostEnvironment env, string name, IDataView input, OneToOneColumn col, ColumnType typeDst, ValueMapper map1, ValueMapper map2 = null, - ValueGetter> keyValueGetter = null, ValueGetter> slotNamesGetter = null) + ValueGetter>> keyValueGetter = null, ValueGetter>> slotNamesGetter = null) : base(env, name, new[] { col }, input, x => null) { Host.Assert(typeDst.RawType == typeof(T3)); @@ -122,15 +122,15 @@ public Impl(IHostEnvironment env, string name, IDataView input, OneToOneColumn c if (keyValueGetter != null) { Host.Assert(_typeDst.ItemType.KeyCount > 0); - MetadataUtils.MetadataGetter> mdGetter = - (int c, ref VBuffer dst) => keyValueGetter(ref dst); + MetadataUtils.MetadataGetter>> mdGetter = + (int c, ref VBuffer> dst) => keyValueGetter(ref dst); bldr.AddGetter(MetadataUtils.Kinds.KeyValues, new VectorType(TextType.Instance, _typeDst.ItemType.KeyCount), mdGetter); } if (slotNamesGetter != null) { Host.Assert(_typeDst.VectorSize > 0); - MetadataUtils.MetadataGetter> mdGetter = - (int c, ref VBuffer dst) => slotNamesGetter(ref dst); + MetadataUtils.MetadataGetter>> mdGetter = + (int c, ref VBuffer> dst) => slotNamesGetter(ref dst); bldr.AddGetter(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, _typeDst.VectorSize), mdGetter); } } diff --git a/src/Microsoft.ML.Data/DataView/SimpleRow.cs b/src/Microsoft.ML.Data/DataView/SimpleRow.cs index 28700c59f9..b94f8f90ec 100644 --- a/src/Microsoft.ML.Data/DataView/SimpleRow.cs +++ b/src/Microsoft.ML.Data/DataView/SimpleRow.cs @@ -70,7 +70,7 @@ public sealed class SimpleSchema : ISchema private readonly string[] _names; private readonly ColumnType[] _types; private readonly Dictionary _columnNameMap; - private readonly MetadataUtils.MetadataGetter>[] _keyValueGetters; + private readonly MetadataUtils.MetadataGetter>>[] _keyValueGetters; public int ColumnCount => _types.Length; @@ -91,10 +91,10 @@ public SimpleSchema(IExceptionContext ectx, params KeyValuePair>[ColumnCount]; + _keyValueGetters = new MetadataUtils.MetadataGetter>>[ColumnCount]; } - public SimpleSchema(IExceptionContext ectx, KeyValuePair[] columns, Dictionary>> keyValues) + public SimpleSchema(IExceptionContext ectx, KeyValuePair[] columns, Dictionary>>> keyValues) : this(ectx, columns) { foreach (var kvp in keyValues) diff --git a/src/Microsoft.ML.Data/Depricated/Instances/HeaderSchema.cs b/src/Microsoft.ML.Data/Depricated/Instances/HeaderSchema.cs index f08d52fe85..58ba66e091 100644 --- a/src/Microsoft.ML.Data/Depricated/Instances/HeaderSchema.cs +++ b/src/Microsoft.ML.Data/Depricated/Instances/HeaderSchema.cs @@ -27,7 +27,7 @@ private sealed class FeatureNameCollectionSchema : ISchema private readonly FeatureNameCollection _collection; - private readonly MetadataUtils.MetadataGetter> _getSlotNames; + private readonly MetadataUtils.MetadataGetter>> _getSlotNames; public int ColumnCount => 1; @@ -86,21 +86,21 @@ public bool TryGetColumnIndex(string name, out int col) return name == RoleMappedSchema.ColumnRole.Feature.Value; } - private void GetSlotNames(int col, ref VBuffer dst) + private void GetSlotNames(int col, ref VBuffer> dst) { Contracts.Assert(col == 0); - var nameList = new List(); + var nameList = new List>(); var indexList = new List(); foreach (var kvp in _collection.GetNonDefaultFeatureNames()) { - nameList.Add(new DvText(kvp.Value)); + nameList.Add(kvp.Value.AsMemory()); indexList.Add(kvp.Key); } var vals = dst.Values; if (Utils.Size(vals) < nameList.Count) - vals = new DvText[nameList.Count]; + vals = new ReadOnlyMemory[nameList.Count]; Array.Copy(nameList.ToArray(), vals, nameList.Count); if (nameList.Count < _collection.Count) { @@ -108,10 +108,10 @@ private void GetSlotNames(int col, ref VBuffer dst) if (Utils.Size(indices) < indexList.Count) indices = new int[indexList.Count]; Array.Copy(indexList.ToArray(), indices, indexList.Count); - dst = new VBuffer(_collection.Count, nameList.Count, vals, indices); + dst = new VBuffer>(_collection.Count, nameList.Count, vals, indices); } else - dst = new VBuffer(_collection.Count, vals, dst.Indices); + dst = new VBuffer>(_collection.Count, vals, dst.Indices); } } @@ -193,15 +193,15 @@ public static FeatureNameCollection Create(RoleMappedSchema schema) Contracts.CheckParam(schema.Feature != null, nameof(schema), "Cannot create feature name collection if we have no features"); Contracts.CheckParam(schema.Feature.Type.ValueCount > 0, nameof(schema), "Cannot create feature name collection if our features are not of known size"); - VBuffer slotNames = default(VBuffer); + VBuffer> slotNames = default; int len = schema.Feature.Type.ValueCount; if (schema.Schema.HasSlotNames(schema.Feature.Index, len)) schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, schema.Feature.Index, ref slotNames); else - slotNames = VBufferUtils.CreateEmpty(len); + slotNames = VBufferUtils.CreateEmpty>(len); string[] names = new string[slotNames.Count]; for (int i = 0; i < slotNames.Count; ++i) - names[i] = slotNames.Values[i].HasChars ? slotNames.Values[i].ToString() : null; + names[i] = !slotNames.Values[i].IsEmpty ? slotNames.Values[i].ToString() : null; if (slotNames.IsDense) return new Dense(names.Length, names); @@ -225,7 +225,7 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature); } - public static void Save(ModelSaveContext ctx, ref VBuffer names) + public static void Save(ModelSaveContext ctx, ref VBuffer> names) { Contracts.AssertValue(ctx); ctx.CheckAtModel(); diff --git a/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs b/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs index 055b2fa299..604cb71977 100644 --- a/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs +++ b/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -128,7 +129,7 @@ public string[] GetLabelInfo(IHostEnvironment env, out ColumnType labelType) if (labelType.IsKey && trainRms.Schema.HasKeyNames(trainRms.Label.Index, labelType.KeyCount)) { - VBuffer keyValues = default(VBuffer); + VBuffer> keyValues = default; trainRms.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, trainRms.Label.Index, ref keyValues); return keyValues.DenseValues().Select(v => v.ToString()).ToArray(); diff --git a/src/Microsoft.ML.Data/EntryPoints/ScoreColumnSelector.cs b/src/Microsoft.ML.Data/EntryPoints/ScoreColumnSelector.cs index c3ebc54678..65bc1daf7b 100644 --- a/src/Microsoft.ML.Data/EntryPoints/ScoreColumnSelector.cs +++ b/src/Microsoft.ML.Data/EntryPoints/ScoreColumnSelector.cs @@ -89,10 +89,10 @@ public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(I if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) continue; // Do not rename the PredictedLabel column. - DvText tmp = default(DvText); + ReadOnlyMemory tmp = default; if (input.Data.Schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, i, ref tmp) - && tmp.EqualsStr(MetadataUtils.Const.ScoreValueKind.PredictedLabel)) + && ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreValueKind.PredictedLabel, tmp)) { continue; } diff --git a/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs index 8e4f3be56c..d9f8066ecc 100644 --- a/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs @@ -125,10 +125,10 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); + var stratVal = new List>(); var auc = new List(); var drAtK = new List(); var drAtP = new List(); @@ -136,13 +136,13 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A var thresholdAtK = new List(); var thresholdAtP = new List(); var thresholdAtNumAnomalies = new List(); - var numAnoms = new List(); + var numAnoms = new List(); var scores = new List(); var labels = new List(); - var names = new List(); + var names = new List>(); var topKStratCol = new List(); - var topKStratVal = new List(); + var topKStratVal = new List>(); bool hasStrats = Utils.Size(dictionaries) > 0; @@ -438,9 +438,9 @@ private struct TopExamplesInfo private ValueGetter _labelGetter; private ValueGetter _scoreGetter; - private ValueGetter _nameGetter; + private ValueGetter> _nameGetter; - public readonly DvText[] Names; + public readonly ReadOnlyMemory[] Names; public readonly Single[] Scores; public readonly Single[] Labels; public int NumTopExamples; @@ -464,7 +464,7 @@ public Aggregator(IHostEnvironment env, int reservoirSize, int topK, int k, Doub AggCounters = new TwoPassCounters(_k, _p); _aucAggregator = new UnweightedAucAggregator(Host.Rand, reservoirSize); - Names = new DvText[_topK]; + Names = new ReadOnlyMemory[_topK]; Scores = new Single[_topK]; Labels = new Single[_topK]; } @@ -491,7 +491,7 @@ private void FinishOtherMetrics() NumTopExamples = _topExamples.Count; while (_topExamples.Count > 0) { - Names[_topExamples.Count - 1] = new DvText(_topExamples.Top.Name); + Names[_topExamples.Count - 1] = _topExamples.Top.Name.AsMemory(); Scores[_topExamples.Count - 1] = _topExamples.Top.Score; Labels[_topExamples.Count - 1] = _topExamples.Top.Label; _topExamples.Pop(); @@ -516,10 +516,10 @@ public override void InitializeNextPass(IRow row, RoleMappedSchema schema) if (_nameIndex < 0) { int rowCounter = 0; - _nameGetter = (ref DvText dst) => dst = new DvText((rowCounter++).ToString()); + _nameGetter = (ref ReadOnlyMemory dst) => dst = (rowCounter++).ToString().AsMemory(); } else - _nameGetter = row.GetGetter(_nameIndex); + _nameGetter = row.GetGetter>(_nameIndex); } } @@ -552,7 +552,7 @@ public override void ProcessRow() _aucAggregator.ProcessRow(label, score); AggCounters.Update(label, score); - var name = default(DvText); + var name = default(ReadOnlyMemory); _nameGetter(ref name); if (_topExamples.Count >= _topK) { @@ -632,7 +632,7 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary(index); + var instanceGetter = cursor.GetGetter>(index); if (!top.Schema.TryGetColumnIndex(AnomalyDetectionEvaluator.TopKResultsColumns.AnomalyScore, out index)) throw Host.Except("Data view does not contain the 'Anomaly Score' column"); var scoreGetter = cursor.GetGetter(index); @@ -651,7 +651,7 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary); Single score = 0; Single label = 0; instanceGetter(ref name); @@ -678,11 +678,11 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary col == numAnomIndex || (hasStrat && col == stratCol))) { - var numAnomGetter = cursor.GetGetter(numAnomIndex); + var numAnomGetter = cursor.GetGetter(numAnomIndex); ValueGetter stratGetter = null; if (hasStrat) { diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs index a567abef39..a92526dfa9 100644 --- a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -170,11 +170,11 @@ protected override Aggregator GetAggregatorCore(RoleMappedSchema schema, string return new Aggregator(Host, classNames, schema.Weight != null, _aucCount, _auPrcCount, _threshold, _useRaw, _prCount, stratName); } - private DvText[] GetClassNames(RoleMappedSchema schema) + private ReadOnlyMemory[] GetClassNames(RoleMappedSchema schema) { // Get the label names if they exist, or use the default names. ColumnType type; - var labelNames = default(VBuffer); + var labelNames = default(VBuffer>); if (schema.Label.Type.IsKey && (type = schema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, schema.Label.Index)) != null && type.ItemType.IsKnownSizeVector && type.ItemType.IsText) @@ -182,8 +182,9 @@ private DvText[] GetClassNames(RoleMappedSchema schema) schema.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, schema.Label.Index, ref labelNames); } else - labelNames = new VBuffer(2, new[] { new DvText("positive"), new DvText("negative") }); - DvText[] names = new DvText[2]; + labelNames = new VBuffer>(2, new[] { "positive".AsMemory(), "negative".AsMemory() }); + + ReadOnlyMemory[] names = new ReadOnlyMemory[2]; labelNames.CopyTo(names); return names; } @@ -216,11 +217,11 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); + var stratVal = new List>(); + var isWeighted = new List(); var auc = new List(); var accuracy = new List(); var posPrec = new List(); @@ -236,7 +237,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A var counts = new List(); var weights = new List(); var confStratCol = new List(); - var confStratVal = new List(); + var confStratVal = new List>(); var scores = new List(); var precision = new List(); @@ -246,7 +247,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A var weightedRecall = new List(); var weightedFpr = new List(); var prStratCol = new List(); - var prStratVal = new List(); + var prStratVal = new List>(); bool hasStrats = Utils.Size(dictionaries) > 0; bool hasWeight = aggregator.Weighted; @@ -261,7 +262,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A agg.Finish(); stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); auc.Add(agg.UnweightedAuc); accuracy.Add(agg.UnweightedCounters.Acc); posPrec.Add(agg.UnweightedCounters.PrecisionPos); @@ -300,7 +301,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); auc.Add(agg.WeightedAuc); accuracy.Add(agg.WeightedCounters.Acc); posPrec.Add(agg.WeightedCounters.PrecisionPos); @@ -359,9 +360,9 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A confDvBldr.AddColumn(MetricKinds.ColumnNames.StratCol, GetKeyValueGetter(dictionaries), 0, dictionaries.Length, confStratCol.ToArray()); confDvBldr.AddColumn(MetricKinds.ColumnNames.StratVal, TextType.Instance, confStratVal.ToArray()); } - ValueGetter> getSlotNames = - (ref VBuffer dst) => - dst = new VBuffer(aggregator.ClassNames.Length, aggregator.ClassNames); + ValueGetter>> getSlotNames = + (ref VBuffer> dst) => + dst = new VBuffer>(aggregator.ClassNames.Length, aggregator.ClassNames); confDvBldr.AddColumn(MetricKinds.ColumnNames.Count, getSlotNames, NumberType.R8, counts.ToArray()); if (hasWeight) @@ -549,9 +550,9 @@ private struct RocInfo private Single _label; private Single _weight; - public readonly DvText[] ClassNames; + public readonly ReadOnlyMemory[] ClassNames; - public Aggregator(IHostEnvironment env, DvText[] classNames, bool weighted, int aucReservoirSize, + public Aggregator(IHostEnvironment env, ReadOnlyMemory[] classNames, bool weighted, int aucReservoirSize, int auPrcReservoirSize, Single threshold, bool useRaw, int prCount, string stratName) : base(env, stratName) { @@ -1165,7 +1166,7 @@ public override Delegate[] CreateGetters(IRow input, Func activeCols, scoreGetter = nanGetter; Action updateCacheIfNeeded; - Func getPredictedLabel; + Func getPredictedLabel; if (_useRaw) { updateCacheIfNeeded = @@ -1199,8 +1200,8 @@ public override Delegate[] CreateGetters(IRow input, Func activeCols, var getters = _probIndex >= 0 ? new Delegate[2] : new Delegate[1]; if (activeCols(AssignedCol)) { - ValueGetter predFn = - (ref DvBool dst) => + ValueGetter predFn = + (ref bool dst) => { updateCacheIfNeeded(); dst = getPredictedLabel(); @@ -1229,9 +1230,10 @@ private Double GetLogLoss(Single prob, Single label) return -Math.Log(1.0 - prob, 2); } - private DvBool GetPredictedLabel(Single val) + private bool GetPredictedLabel(Single val) { - return val.IsNA() ? DvBool.NA : val > _threshold ? DvBool.True : DvBool.False; + //Behavior for NA values is undefined. + return Single.IsNaN(val) ? false : val > _threshold; } public override RowMapperColumnInfo[] GetOutputColumns() diff --git a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs index bec1ac144a..fbbafa8775 100644 --- a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -115,11 +115,11 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); + var stratVal = new List>(); + var isWeighted = new List(); var nmi = new List(); var avgMinScores = new List(); var dbi = new List(); @@ -136,7 +136,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); nmi.Add(agg.UnweightedCounters.Nmi); avgMinScores.Add(agg.UnweightedCounters.AvgMinScores); if (agg.UnweightedCounters.CalculateDbi) @@ -145,7 +145,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); nmi.Add(agg.WeightedCounters.Nmi); avgMinScores.Add(agg.WeightedCounters.AvgMinScores); if (agg.WeightedCounters.CalculateDbi) @@ -685,10 +685,10 @@ public override RowMapperColumnInfo[] GetOutputColumns() var slotNamesType = new VectorType(TextType.Instance, _numClusters); var sortedClusters = new ColumnMetadataInfo(SortedClusters); - sortedClusters.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + sortedClusters.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(_numClusters, "Cluster"))); var sortedClusterScores = new ColumnMetadataInfo(SortedClusterScores); - sortedClusterScores.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + sortedClusterScores.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(_numClusters, "Score"))); infos[SortedClusterCol] = new RowMapperColumnInfo(SortedClusters, _types[SortedClusterCol], sortedClusters); @@ -698,17 +698,17 @@ public override RowMapperColumnInfo[] GetOutputColumns() } // REVIEW: Figure out how to avoid having the column name in each slot name. - private MetadataUtils.MetadataGetter> CreateSlotNamesGetter(int numTopClusters, string suffix) + private MetadataUtils.MetadataGetter>> CreateSlotNamesGetter(int numTopClusters, string suffix) { return - (int col, ref VBuffer dst) => + (int col, ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < numTopClusters) - values = new DvText[numTopClusters]; + values = new ReadOnlyMemory[numTopClusters]; for (int i = 1; i <= numTopClusters; i++) - values[i - 1] = new DvText(string.Format("#{0} {1}", i, suffix)); - dst = new VBuffer(numTopClusters, values); + values[i - 1] = string.Format("#{0} {1}", i, suffix).AsMemory(); + dst = new VBuffer>(numTopClusters, values); }; } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs index c628cff1e4..91e5f77d0e 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs @@ -167,18 +167,18 @@ private Dictionary ProcessData(IDataView data, RoleMappedSche needMorePasses = finishPass(); } - Action addAgg; + Action, TAgg> addAgg; Func> consolidate; GetAggregatorConsolidationFuncs(aggregator, dictionaries, out addAgg, out consolidate); uint stratColKey = 0; - addAgg(stratColKey, DvText.NA, aggregator); + addAgg(stratColKey, default, aggregator); for (int i = 0; i < Utils.Size(dictionaries); i++) { var dict = dictionaries[i]; stratColKey++; foreach (var agg in dict.GetAll()) - addAgg(stratColKey, new DvText(agg.StratName), agg); + addAgg(stratColKey, agg.StratName.AsMemory(), agg); } return consolidate(); } @@ -192,21 +192,21 @@ private Dictionary ProcessData(IDataView data, RoleMappedSche /// the dictionary of metric data views. /// protected abstract void GetAggregatorConsolidationFuncs(TAgg aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate); + out Action, TAgg> addAgg, out Func> consolidate); - protected ValueGetter> GetKeyValueGetter(AggregatorDictionaryBase[] dictionaries) + protected ValueGetter>> GetKeyValueGetter(AggregatorDictionaryBase[] dictionaries) { if (Utils.Size(dictionaries) == 0) return null; return - (ref VBuffer dst) => + (ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < dictionaries.Length) - values = new DvText[dictionaries.Length]; + values = new ReadOnlyMemory[dictionaries.Length]; for (int i = 0; i < dictionaries.Length; i++) - values[i] = new DvText(dictionaries[i].ColName); - dst = new VBuffer(dictionaries.Length, values, dst.Indices); + values[i] = dictionaries[i].ColName.AsMemory(); + dst = new VBuffer>(dictionaries.Length, values, dst.Indices); }; } @@ -296,7 +296,7 @@ public void GetWarnings(Dictionary dict, IHostEnvironment env { var dvBldr = new ArrayDataViewBuilder(env); dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, - warnings.Select(s => new DvText(s)).ToArray()); + warnings.Select(s => s.AsMemory()).ToArray()); dict.Add(MetricKinds.Warnings, dvBldr.GetDataView()); } } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 5244762d90..ffc8820e1a 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -55,7 +55,7 @@ public static Dictionary> Instanc public static IMamlEvaluator GetEvaluator(IHostEnvironment env, ISchema schema) { Contracts.CheckValueOrNull(env); - DvText tmp = default; + ReadOnlyMemory tmp = default; schema.GetMaxMetadataKind(out int col, MetadataUtils.Kinds.ScoreColumnSetId, CheckScoreColumnKindIsKnown); if (col >= 0) { @@ -83,7 +83,7 @@ private static bool CheckScoreColumnKindIsKnown(ISchema schema, int col) var columnType = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnKind, col); if (columnType == null || !columnType.IsText) return false; - DvText tmp = default(DvText); + ReadOnlyMemory tmp = default; schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, col, ref tmp); var map = DefaultEvaluatorTable.Instance; return map.ContainsKey(tmp.ToString()); @@ -125,18 +125,18 @@ public static ColumnInfo GetScoreColumnInfo(IExceptionContext ectx, ISchema sche var maxSetNum = schema.GetMaxMetadataKind(out colTmp, MetadataUtils.Kinds.ScoreColumnSetId, (s, c) => IsScoreColumnKind(ectx, s, c, kind)); - DvText tmp = default(DvText); + ReadOnlyMemory tmp = default; foreach (var col in schema.GetColumnSet(MetadataUtils.Kinds.ScoreColumnSetId, maxSetNum)) { #if DEBUG schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, col, ref tmp); - ectx.Assert(tmp.EqualsStr(kind)); + ectx.Assert(ReadOnlyMemoryUtils.EqualsStr(kind, tmp)); #endif // REVIEW: What should this do about hidden columns? Currently we ignore them. if (schema.IsHidden(col)) continue; if (schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, col, ref tmp) && - tmp.EqualsStr(valueKind)) + ReadOnlyMemoryUtils.EqualsStr(valueKind, tmp)) { return ColumnInfo.CreateFromIndex(schema, col); } @@ -187,14 +187,14 @@ public static ColumnInfo GetOptAuxScoreColumnInfo(IExceptionContext ectx, ISchem uint setId = 0; schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnSetId, colScore, ref setId); - DvText tmp = default(DvText); + ReadOnlyMemory tmp = default; foreach (var col in schema.GetColumnSet(MetadataUtils.Kinds.ScoreColumnSetId, setId)) { // REVIEW: What should this do about hidden columns? Currently we ignore them. if (schema.IsHidden(col)) continue; if (schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, col, ref tmp) && - tmp.EqualsStr(valueKind)) + ReadOnlyMemoryUtils.EqualsStr(valueKind, tmp)) { var res = ColumnInfo.CreateFromIndex(schema, col); if (testType(res.Type)) @@ -216,9 +216,9 @@ private static bool IsScoreColumnKind(IExceptionContext ectx, ISchema schema, in var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnKind, col); if (type == null || !type.IsText) return false; - var tmp = default(DvText); + var tmp = default(ReadOnlyMemory); schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, col, ref tmp); - return tmp.EqualsStr(kind); + return ReadOnlyMemoryUtils.EqualsStr(kind, tmp); } /// @@ -272,12 +272,12 @@ public static IEnumerable> GetMetrics(IDataView met using (var cursor = metricsView.GetRowCursor(col => true)) { - DvBool isWeighted = DvBool.False; - ValueGetter isWeightedGetter; + bool isWeighted = false; + ValueGetter isWeightedGetter; if (hasWeighted) - isWeightedGetter = cursor.GetGetter(isWeightedCol); + isWeightedGetter = cursor.GetGetter(isWeightedCol); else - isWeightedGetter = (ref DvBool dst) => dst = DvBool.False; + isWeightedGetter = (ref bool dst) => dst = false; ValueGetter stratColGetter; if (hasStrats) @@ -313,7 +313,7 @@ public static IEnumerable> GetMetrics(IDataView met while (cursor.MoveNext()) { isWeightedGetter(ref isWeighted); - if (isWeighted.IsTrue) + if (isWeighted) continue; stratColGetter(ref strat); @@ -341,17 +341,17 @@ public static IEnumerable> GetMetrics(IDataView met // For R8 vector valued columns the names of the metrics are the column name, // followed by the slot name if it exists, or "Label_i" if it doesn't. - VBuffer names = default(VBuffer); + VBuffer> names = default; var size = schema.GetColumnType(i).VectorSize; var slotNamesType = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, i); if (slotNamesType != null && slotNamesType.VectorSize == size && slotNamesType.ItemType.IsText) schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref names); else { - var namesArray = new DvText[size]; + var namesArray = new ReadOnlyMemory[size]; for (int j = 0; j < size; j++) - namesArray[j] = new DvText(string.Format("({0})", j)); - names = new VBuffer(size, namesArray); + namesArray[j] = string.Format("({0})", j).AsMemory(); + names = new VBuffer>(size, namesArray); } var colName = schema.GetColumnName(i); foreach (var metric in metricVals.Items(all: true)) @@ -370,7 +370,7 @@ private static IDataView AddTextColumn(IHostEnvironment env, IDataView inp { Contracts.Check(typeSrc.RawType == typeof(TSrc)); return LambdaColumnMapper.Create(env, registrationName, input, inputColName, outputColName, typeSrc, TextType.Instance, - (ref TSrc src, ref DvText dst) => dst = new DvText(value)); + (ref TSrc src, ref ReadOnlyMemory dst) => dst = value.AsMemory()); } /// @@ -400,7 +400,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int } private static IDataView AddKeyColumn(IHostEnvironment env, IDataView input, string inputColName, string outputColName, - ColumnType typeSrc, int keyCount, int value, string registrationName, ValueGetter> keyValueGetter) + ColumnType typeSrc, int keyCount, int value, string registrationName, ValueGetter>> keyValueGetter) { Contracts.Check(typeSrc.RawType == typeof(TSrc)); return LambdaColumnMapper.Create(env, registrationName, input, inputColName, outputColName, typeSrc, @@ -439,7 +439,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int var inputColType = input.Schema.GetColumnType(inputCol); return Utils.MarshalInvoke(AddKeyColumn, inputColType.RawType, env, input, inputColName, MetricKinds.ColumnNames.FoldIndex, - inputColType, numFolds, curFold + 1, "FoldIndex", default(ValueGetter>)); + inputColType, numFolds, curFold + 1, "FoldIndex", default(ValueGetter>>)); } /// @@ -456,9 +456,9 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int Contracts.CheckParam(typeof(T) == itemType.RawType, nameof(itemType), "Generic type does not match the item type"); var numIdvs = views.Length; - var slotNames = new Dictionary(); + var slotNames = new Dictionary(); var maps = new int[numIdvs][]; - var slotNamesCur = default(VBuffer); + var slotNamesCur = default(VBuffer>); var typeSrc = new ColumnType[numIdvs]; // Create mappings from the original slots to the reconciled slots. for (int i = 0; i < numIdvs; i++) @@ -477,23 +477,23 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int foreach (var kvp in slotNamesCur.Items(true)) { var index = kvp.Key; - var name = kvp.Value; + var name = kvp.Value.ToString(); if (!slotNames.ContainsKey(name)) slotNames[name] = slotNames.Count; map[index] = slotNames[name]; } } - var reconciledSlotNames = new VBuffer(slotNames.Count, slotNames.Keys.ToArray()); - ValueGetter> slotNamesGetter = - (ref VBuffer dst) => + var reconciledSlotNames = new VBuffer>(slotNames.Count, slotNames.Keys.Select(k => k.AsMemory()).ToArray()); + ValueGetter>> slotNamesGetter = + (ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < reconciledSlotNames.Length) - values = new DvText[reconciledSlotNames.Length]; + values = new ReadOnlyMemory[reconciledSlotNames.Length]; Array.Copy(reconciledSlotNames.Values, values, reconciledSlotNames.Length); - dst = new VBuffer(reconciledSlotNames.Length, values, dst.Indices); + dst = new VBuffer>(reconciledSlotNames.Length, values, dst.Indices); }; // For each input data view, create the reconciled key column by wrapping it in a LambdaColumnMapper. @@ -553,7 +553,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int } private static int[][] MapKeys(ISchema[] schemas, string columnName, bool isVec, - int[] indices, Dictionary reconciledKeyNames) + int[] indices, Dictionary, int> reconciledKeyNames) { Contracts.AssertValue(indices); Contracts.AssertValue(reconciledKeyNames); @@ -582,7 +582,7 @@ private static int[][] MapKeys(ISchema[] schemas, string columnName, bool isV foreach (var kvp in keyNamesCur.Items(true)) { var key = kvp.Key; - var name = new DvText(kvp.Value.ToString()); + var name = kvp.Value.ToString().AsMemory(); if (!reconciledKeyNames.ContainsKey(name)) reconciledKeyNames[name] = reconciledKeyNames.Count; keyValueMappers[i][key] = reconciledKeyNames[name]; @@ -606,14 +606,14 @@ public static void ReconcileKeyValues(IHostEnvironment env, IDataView[] views, s // Create mappings from the original key types to the reconciled key type. var indices = new int[dvCount]; - var keyNames = new Dictionary(); + var keyNames = new Dictionary, int>(); // We use MarshalInvoke so that we can call MapKeys with the correct generic: keyValueType.RawType. var keyValueMappers = Utils.MarshalInvoke(MapKeys, keyValueType.RawType, views.Select(view => view.Schema).ToArray(), columnName, false, indices, keyNames); var keyType = new KeyType(DataKind.U4, 0, keyNames.Count); - var keyNamesVBuffer = new VBuffer(keyNames.Count, keyNames.Keys.ToArray()); - ValueGetter> keyValueGetter = - (ref VBuffer dst) => - dst = new VBuffer(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); + var keyNamesVBuffer = new VBuffer>(keyNames.Count, keyNames.Keys.ToArray()); + ValueGetter>> keyValueGetter = + (ref VBuffer> dst) => + dst = new VBuffer>(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); // For each input data view, create the reconciled key column by wrapping it in a LambdaColumnMapper. for (int i = 0; i < dvCount; i++) @@ -674,14 +674,14 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi var dvCount = views.Length; - var keyNames = new Dictionary(); + var keyNames = new Dictionary, int>(); var columnIndices = new int[dvCount]; var keyValueMappers = Utils.MarshalInvoke(MapKeys, keyValueType.RawType, views.Select(view => view.Schema).ToArray(), columnName, true, columnIndices, keyNames); var keyType = new KeyType(DataKind.U4, 0, keyNames.Count); - var keyNamesVBuffer = new VBuffer(keyNames.Count, keyNames.Keys.ToArray()); - ValueGetter> keyValueGetter = - (ref VBuffer dst) => - dst = new VBuffer(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); + var keyNamesVBuffer = new VBuffer>(keyNames.Count, keyNames.Keys.ToArray()); + ValueGetter>> keyValueGetter = + (ref VBuffer> dst) => + dst = new VBuffer>(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); for (int i = 0; i < dvCount; i++) { @@ -720,14 +720,14 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi } }; - ValueGetter> slotNamesGetter = null; + ValueGetter>> slotNamesGetter = null; var type = views[i].Schema.GetColumnType(columnIndices[i]); if (views[i].Schema.HasSlotNames(columnIndices[i], type.VectorSize)) { var schema = views[i].Schema; int index = columnIndices[i]; slotNamesGetter = - (ref VBuffer dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, index, ref dst); + (ref VBuffer> dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, index, ref dst); } views[i] = LambdaColumnMapper.Create(env, "ReconcileKeyValues", views[i], columnName, columnName, type, new VectorType(keyType, type.AsVector), mapper, keyValueGetter, slotNamesGetter); @@ -810,7 +810,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string // Make sure there are no variable size vector columns. // This is a dictionary from the column name to its vector size. var vectorSizes = new Dictionary(); - var firstDvSlotNames = new Dictionary>(); + var firstDvSlotNames = new Dictionary>>(); ColumnType labelColKeyValuesType = null; var firstDvKeyWithNamesColumns = new List(); var firstDvKeyNoNamesColumns = new Dictionary(); @@ -840,7 +840,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string // Store the slot names of the 1st idv and use them as baseline. if (dv.Schema.HasSlotNames(i, type.VectorSize)) { - VBuffer slotNames = default(VBuffer); + VBuffer> slotNames = default; dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref slotNames); firstDvSlotNames.Add(name, slotNames); } @@ -849,7 +849,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string int cachedSize; if (vectorSizes.TryGetValue(name, out cachedSize)) { - VBuffer slotNames; + VBuffer> slotNames; // In the event that no slot names were recorded here, then slotNames will be // the default, length 0 vector. firstDvSlotNames.TryGetValue(name, out slotNames); @@ -949,7 +949,7 @@ private static IEnumerable FindHiddenColumns(ISchema schema, string colName } private static bool VerifyVectorColumnsMatch(int cachedSize, int col, IDataView dv, - ColumnType type, ref VBuffer firstDvSlotNames) + ColumnType type, ref VBuffer> firstDvSlotNames) { if (cachedSize != type.VectorSize) return false; @@ -958,7 +958,7 @@ private static bool VerifyVectorColumnsMatch(int cachedSize, int col, IDataView if (dv.Schema.HasSlotNames(col, type.VectorSize)) { // Verify that slots match with slots from 1st idv. - VBuffer currSlotNames = default(VBuffer); + VBuffer> currSlotNames = default; dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, col, ref currSlotNames); if (currSlotNames.Length != firstDvSlotNames.Length) @@ -967,7 +967,7 @@ private static bool VerifyVectorColumnsMatch(int cachedSize, int col, IDataView { var result = true; VBufferUtils.ForEachEitherDefined(ref currSlotNames, ref firstDvSlotNames, - (slot, val1, val2) => result = result && DvText.Identical(val1, val2)); + (slot, val1, val2) => result = result && val1.Span.SequenceEqual(val2.Span)); return result; } } @@ -995,7 +995,7 @@ private static List GetMetricNames(IChannel ch, ISchema schema, IRow row // Get the names of the metrics. For R8 valued columns the metric name is the column name. For R8 vector valued columns // the names of the metrics are the column name, followed by the slot name if it exists, or "Label_i" if it doesn't. - VBuffer names = default(VBuffer); + VBuffer> names = default; int metricCount = 0; var metricNames = new List(); for (int i = 0; i < schema.ColumnCount; i++) @@ -1028,10 +1028,10 @@ private static List GetMetricNames(IChannel ch, ISchema schema, IRow row { var namesArray = names.Values; if (Utils.Size(namesArray) < type.VectorSize) - namesArray = new DvText[type.VectorSize]; + namesArray = new ReadOnlyMemory[type.VectorSize]; for (int j = 0; j < type.VectorSize; j++) - namesArray[j] = new DvText(string.Format("Label_{0}", j)); - names = new VBuffer(type.VectorSize, namesArray); + namesArray[j] = string.Format("Label_{0}", j).AsMemory(); + names = new VBuffer>(type.VectorSize, namesArray); } foreach (var name in names.Items(all: true)) metricNames.Add(string.Format("{0}{1}", metricName, name.Value)); @@ -1095,12 +1095,12 @@ internal static AggregatedMetric[] ComputeMetricsSum(IHostEnvironment env, IData AggregatedMetric[] agg; using (var cursor = data.GetRowCursor(col => true)) { - DvBool isWeighted = DvBool.False; - ValueGetter isWeightedGetter; + bool isWeighted = false; + ValueGetter isWeightedGetter; if (hasWeighted) - isWeightedGetter = cursor.GetGetter(isWeightedCol); + isWeightedGetter = cursor.GetGetter(isWeightedCol); else - isWeightedGetter = (ref DvBool dst) => dst = DvBool.False; + isWeightedGetter = (ref bool dst) => dst = false; ValueGetter stratColGetter; if (hasStrats) @@ -1138,7 +1138,7 @@ internal static AggregatedMetric[] ComputeMetricsSum(IHostEnvironment env, IData continue; isWeightedGetter(ref isWeighted); - if (isWeighted.IsTrue) + if (isWeighted) { // If !average, we should have only one relevant row. if (numWeightedResults > numFolds) @@ -1233,8 +1233,8 @@ internal static IDataView GetAverageToDataView(IHostEnvironment env, ISchema sch MetricKinds.ColumnNames.StratCol); } - ValueGetter> getKeyValues = - (ref VBuffer dst) => + ValueGetter>> getKeyValues = + (ref VBuffer> dst) => { schema.GetMetadata(MetadataUtils.Kinds.KeyValues, stratCol, ref dst); Contracts.Assert(dst.IsDense); @@ -1246,19 +1246,20 @@ internal static IDataView GetAverageToDataView(IHostEnvironment env, ISchema sch } else if (i == stratVal) { - var stratVals = foldCol >= 0 ? new[] { DvText.NA, DvText.NA } : new[] { DvText.NA }; + //REVIEW: Not sure if empty string makes sense here. + var stratVals = foldCol >= 0 ? new[] { "".AsMemory(), "".AsMemory() } : new[] { "".AsMemory() }; dvBldr.AddColumn(MetricKinds.ColumnNames.StratVal, TextType.Instance, stratVals); weightedDvBldr?.AddColumn(MetricKinds.ColumnNames.StratVal, TextType.Instance, stratVals); } else if (i == isWeightedCol) { env.AssertValue(weightedDvBldr); - dvBldr.AddColumn(MetricKinds.ColumnNames.IsWeighted, BoolType.Instance, foldCol >= 0 ? new[] { DvBool.False, DvBool.False } : new[] { DvBool.False }); - weightedDvBldr.AddColumn(MetricKinds.ColumnNames.IsWeighted, BoolType.Instance, foldCol >= 0 ? new[] { DvBool.True, DvBool.True } : new[] { DvBool.True }); + dvBldr.AddColumn(MetricKinds.ColumnNames.IsWeighted, BoolType.Instance, foldCol >= 0 ? new[] { false, false} : new[] { false }); + weightedDvBldr.AddColumn(MetricKinds.ColumnNames.IsWeighted, BoolType.Instance, foldCol >= 0 ? new[] { true, true } : new[] { true }); } else if (i == foldCol) { - var foldVals = new[] { new DvText("Average"), new DvText("Standard Deviation") }; + var foldVals = new[] { "Average".AsMemory(), "Standard Deviation".AsMemory() }; dvBldr.AddColumn(MetricKinds.ColumnNames.FoldIndex, TextType.Instance, foldVals); weightedDvBldr?.AddColumn(MetricKinds.ColumnNames.FoldIndex, TextType.Instance, foldVals); } @@ -1297,11 +1298,11 @@ private static void AddVectorColumn(this ArrayDataViewBuilder dvBldr, IHostEnvir for (int j = 0; j < vectorStdevMetrics.Length; j++) vectorStdevMetrics[j] = Math.Sqrt(agg[iMetric + j].SumSq / numFolds - vectorMetrics[j] * vectorMetrics[j]); } - var names = new DvText[type.VectorSize]; + var names = new ReadOnlyMemory[type.VectorSize]; for (int j = 0; j < names.Length; j++) - names[j] = new DvText(agg[iMetric + j].Name); - var slotNames = new VBuffer(type.VectorSize, names); - ValueGetter> getSlotNames = (ref VBuffer dst) => dst = slotNames; + names[j] = agg[iMetric + j].Name.AsMemory(); + var slotNames = new VBuffer>(type.VectorSize, names); + ValueGetter>> getSlotNames = (ref VBuffer> dst) => dst = slotNames; if (vectorStdevMetrics != null) { env.AssertValue(vectorStdevMetrics); @@ -1356,7 +1357,7 @@ public static string GetConfusionTable(IHost host, IDataView confusionDataView, var type = confusionDataView.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, countCol); host.Check(type != null && type.IsKnownSizeVector && type.ItemType.IsText, "The Count column does not have a text vector metadata of kind SlotNames."); - var labelNames = default(VBuffer); + var labelNames = default(VBuffer>); confusionDataView.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countCol, ref labelNames); host.Check(labelNames.IsDense, "Slot names vector must be dense"); @@ -1477,7 +1478,7 @@ private static double[][] GetConfusionTableAsArray(IDataView confusionDataView, /// stratified metrics, it must contain two text columns named "StratCol" and "StratVal", containing the stratification column /// name, and a text description of the value. In this case, the value of column StratVal in the row corresponding to the entire /// dataset should contain the text "overall", and the value of column StratCol should be DvText.NA. If weighted metrics are present - /// then the data view should also contain a DvBool column named "IsWeighted". + /// then the data view should also contain a bool column named "IsWeighted". /// If the IsWeighted column exists, this is assigned the string representation of the weighted /// metrics. Otherwise it is assigned null. public static string GetPerFoldResults(IHostEnvironment env, IDataView fold, out string weightedMetrics) @@ -1507,7 +1508,7 @@ private static string GetOverallMetricsAsString(double[] sumMetrics, double[] su // This method returns a string representation of a set of metrics. If there are stratification columns, it looks for columns named // StratCol and StratVal, and outputs the metrics in the rows with NA in the StratCol column. If weighted is true, it looks - // for a DvBool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column. + // for a bool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column. // If nonAveragedCols is non-null, it computes the average and standard deviation over all the relevant rows and populates // nonAveragedCols with columns that are either hidden, or are not of a type that we can display (i.e., either a numeric column, // or a known length vector of doubles). @@ -1536,7 +1537,7 @@ private static string GetFoldMetricsAsString(IHostEnvironment env, IDataView dat // Get a string representation of a confusion table. private static string GetConfusionTableAsString(double[][] confusionTable, double[] rowSums, double[] columnSums, - DvText[] predictedLabelNames, string prefix = "", bool sampled = false, bool binary = true) + ReadOnlyMemory[] predictedLabelNames, string prefix = "", bool sampled = false, bool binary = true) { int numLabels = Utils.Size(confusionTable); @@ -1687,8 +1688,8 @@ public static void PrintWarnings(IChannel ch, Dictionary metr { using (var cursor = warnings.GetRowCursor(c => c == col)) { - var warning = default(DvText); - var getter = cursor.GetGetter(col); + var warning = default(ReadOnlyMemory); + var getter = cursor.GetGetter>(col); while (cursor.MoveNext()) { getter(ref warning); diff --git a/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs index d57835b168..84fd29ea50 100644 --- a/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MultiOutputRegressionEvaluator.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -94,11 +94,11 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); + var stratVal = new List>(); + var isWeighted = new List(); var l1 = new List(); var l2 = new List(); var dist = new List(); @@ -117,7 +117,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); l1.Add(agg.UnweightedCounters.L1); l2.Add(agg.UnweightedCounters.L2); dist.Add(agg.UnweightedCounters.Dist); @@ -129,7 +129,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); l1.Add(agg.WeightedCounters.L1); l2.Add(agg.WeightedCounters.L2); dist.Add(agg.WeightedCounters.Dist); @@ -361,15 +361,15 @@ public override void ProcessRow() WeightedCounters.Update(score, label, _size, weight); } - public void GetSlotNames(ref VBuffer slotNames) + public void GetSlotNames(ref VBuffer> slotNames) { var values = slotNames.Values; if (Utils.Size(values) < _size) - values = new DvText[_size]; + values = new ReadOnlyMemory[_size]; for (int i = 0; i < _size; i++) - values[i] = new DvText(string.Format("(Label_{0})", i)); - slotNames = new VBuffer(_size, values); + values[i] = string.Format("(Label_{0})", i).AsMemory(); + slotNames = new VBuffer>(_size, values); } } } @@ -555,7 +555,7 @@ private void CheckInputColumnTypes(ISchema schema, out ColumnType labelType, out labelType = new VectorType(t.ItemType.AsPrimitive, t.VectorSize); var slotNamesType = new VectorType(TextType.Instance, t.VectorSize); labelMetadata = new ColumnMetadataInfo(LabelCol); - labelMetadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + labelMetadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(schema, LabelIndex, labelType.VectorSize, "True"))); t = schema.GetColumnType(ScoreIndex); @@ -563,10 +563,10 @@ private void CheckInputColumnTypes(ISchema schema, out ColumnType labelType, out throw Host.Except("Score column '{0}' has type '{1}' but must be a known length vector of type R4", ScoreCol, t); scoreType = new VectorType(t.ItemType.AsPrimitive, t.VectorSize); scoreMetadata = new ColumnMetadataInfo(ScoreCol); - scoreMetadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + scoreMetadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(schema, ScoreIndex, scoreType.VectorSize, "Predicted"))); - scoreMetadata.Add(MetadataUtils.Kinds.ScoreColumnKind, new MetadataInfo(TextType.Instance, GetScoreColumnKind)); - scoreMetadata.Add(MetadataUtils.Kinds.ScoreValueKind, new MetadataInfo(TextType.Instance, GetScoreValueKind)); + scoreMetadata.Add(MetadataUtils.Kinds.ScoreColumnKind, new MetadataInfo>(TextType.Instance, GetScoreColumnKind)); + scoreMetadata.Add(MetadataUtils.Kinds.ScoreValueKind, new MetadataInfo>(TextType.Instance, GetScoreValueKind)); scoreMetadata.Add(MetadataUtils.Kinds.ScoreColumnSetId, new MetadataInfo(MetadataUtils.ScoreColumnSetIdType, GetScoreColumnSetId(schema))); } @@ -580,33 +580,33 @@ private MetadataUtils.MetadataGetter GetScoreColumnSetId(ISchema schema) (int col, ref uint dst) => dst = id; } - private void GetScoreColumnKind(int col, ref DvText dst) + private void GetScoreColumnKind(int col, ref ReadOnlyMemory dst) { - dst = new DvText(MetadataUtils.Const.ScoreColumnKind.MultiOutputRegression); + dst = MetadataUtils.Const.ScoreColumnKind.MultiOutputRegression.AsMemory(); } - private void GetScoreValueKind(int col, ref DvText dst) + private void GetScoreValueKind(int col, ref ReadOnlyMemory dst) { - dst = new DvText(MetadataUtils.Const.ScoreValueKind.Score); + dst = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); } - private MetadataUtils.MetadataGetter> CreateSlotNamesGetter(ISchema schema, int column, int length, string prefix) + private MetadataUtils.MetadataGetter>> CreateSlotNamesGetter(ISchema schema, int column, int length, string prefix) { var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, column); if (type != null && type.IsText) { return - (int col, ref VBuffer dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, column, ref dst); + (int col, ref VBuffer> dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, column, ref dst); } return - (int col, ref VBuffer dst) => + (int col, ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < length) - values = new DvText[length]; + values = new ReadOnlyMemory[length]; for (int i = 0; i < length; i++) - values[i] = new DvText(string.Format("{0}_{1}", prefix, i)); - dst = new VBuffer(length, values); + values[i] = string.Format("{0}_{1}", prefix, i).AsMemory(); + dst = new VBuffer>(length, values); }; } } @@ -680,12 +680,12 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary true)) { - DvBool isWeighted = DvBool.False; - ValueGetter isWeightedGetter; + bool isWeighted = false; + ValueGetter isWeightedGetter; if (needWeighted) - isWeightedGetter = cursor.GetGetter(isWeightedCol); + isWeightedGetter = cursor.GetGetter(isWeightedCol); else - isWeightedGetter = (ref DvBool dst) => dst = DvBool.False; + isWeightedGetter = (ref bool dst) => dst = false; ValueGetter stratGetter; if (hasStrats) @@ -715,9 +715,9 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary[labelCount]; for (int j = 0; j < labelCount; j++) - labelNames[j] = new DvText(string.Format("Label_{0}", j)); + labelNames[j] = string.Format("Label_{0}", j).AsMemory(); var sb = new StringBuilder(); sb.AppendLine("Per-label metrics:"); @@ -733,12 +733,12 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary[] GetClassNames(RoleMappedSchema schema) { - DvText[] names; + ReadOnlyMemory[] names; // Get the label names from the score column if they exist, or use the default names. var scoreInfo = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); var mdType = schema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, scoreInfo.Index); - var labelNames = default(VBuffer); + var labelNames = default(VBuffer>); if (mdType != null && mdType.IsKnownSizeVector && mdType.ItemType.IsText) { schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, scoreInfo.Index, ref labelNames); - names = new DvText[labelNames.Length]; + names = new ReadOnlyMemory[labelNames.Length]; labelNames.CopyTo(names); } else @@ -111,7 +111,7 @@ private DvText[] GetClassNames(RoleMappedSchema schema) Host.Assert(Utils.Size(score) == 1); Host.Assert(score[0].Type.VectorSize > 0); int numClasses = score[0].Type.VectorSize; - names = Enumerable.Range(0, numClasses).Select(i => new DvText(i.ToString())).ToArray(); + names = Enumerable.Range(0, numClasses).Select(i => i.ToString().AsMemory()).ToArray(); } return names; } @@ -137,23 +137,21 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); - + var stratVal = new List>(); + var isWeighted = new List(); var microAcc = new List(); var macroAcc = new List(); var logLoss = new List(); var logLossRed = new List(); var topKAcc = new List(); var perClassLogLoss = new List(); - var counts = new List(); var weights = new List(); var confStratCol = new List(); - var confStratVal = new List(); + var confStratVal = new List>(); bool hasStrats = Utils.Size(dictionaries) > 0; bool hasWeight = aggregator.Weighted; @@ -167,7 +165,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); microAcc.Add(agg.UnweightedCounters.MicroAvgAccuracy); macroAcc.Add(agg.UnweightedCounters.MacroAvgAccuracy); logLoss.Add(agg.UnweightedCounters.LogLoss); @@ -184,7 +182,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); microAcc.Add(agg.WeightedCounters.MicroAvgAccuracy); macroAcc.Add(agg.WeightedCounters.MacroAvgAccuracy); logLoss.Add(agg.WeightedCounters.LogLoss); @@ -221,9 +219,9 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A confDvBldr.AddColumn(MetricKinds.ColumnNames.StratCol, GetKeyValueGetter(dictionaries), 0, dictionaries.Length, confStratCol.ToArray()); confDvBldr.AddColumn(MetricKinds.ColumnNames.StratVal, TextType.Instance, confStratVal.ToArray()); } - ValueGetter> getSlotNames = - (ref VBuffer dst) => - dst = new VBuffer(aggregator.ClassNames.Length, aggregator.ClassNames); + ValueGetter>> getSlotNames = + (ref VBuffer> dst) => + dst = new VBuffer>(aggregator.ClassNames.Length, aggregator.ClassNames); confDvBldr.AddColumn(MetricKinds.ColumnNames.Count, getSlotNames, NumberType.R8, counts.ToArray()); if (hasWeight) @@ -372,9 +370,9 @@ public void Update(int[] indices, double loglossCurr, int label, float weight) private long _numUnknownClassInstances; private long _numNegOrNonIntegerLabels; - public readonly DvText[] ClassNames; + public readonly ReadOnlyMemory[] ClassNames; - public Aggregator(IHostEnvironment env, DvText[] classNames, int scoreVectorSize, bool weighted, int? outputTopKAcc, string stratName) + public Aggregator(IHostEnvironment env, ReadOnlyMemory[] classNames, int scoreVectorSize, bool weighted, int? outputTopKAcc, string stratName) : base(env, stratName) { Host.Assert(outputTopKAcc == null || outputTopKAcc > 0); @@ -488,15 +486,15 @@ protected override List GetWarningsCore() return warnings; } - public void GetSlotNames(ref VBuffer slotNames) + public void GetSlotNames(ref VBuffer> slotNames) { var values = slotNames.Values; if (Utils.Size(values) < ClassNames.Length) - values = new DvText[ClassNames.Length]; + values = new ReadOnlyMemory[ClassNames.Length]; for (int i = 0; i < ClassNames.Length; i++) - values[i] = new DvText(string.Format("(class {0})", ClassNames[i])); - slotNames = new VBuffer(ClassNames.Length, values); + values[i] = string.Format("(class {0})", ClassNames[i]).AsMemory(); + slotNames = new VBuffer>(ClassNames.Length, values); } } @@ -691,7 +689,7 @@ private static VersionInfo GetVersionInfo() private const float Epsilon = (float)1e-15; private readonly int _numClasses; - private readonly DvText[] _classNames; + private readonly ReadOnlyMemory[] _classNames; private readonly ColumnType[] _types; public MultiClassPerInstanceEvaluator(IHostEnvironment env, ISchema schema, ColumnInfo scoreInfo, string labelCol) @@ -704,13 +702,13 @@ public MultiClassPerInstanceEvaluator(IHostEnvironment env, ISchema schema, Colu if (schema.HasSlotNames(ScoreIndex, _numClasses)) { - var classNames = default(VBuffer); + var classNames = default(VBuffer>); schema.GetMetadata(MetadataUtils.Kinds.SlotNames, ScoreIndex, ref classNames); - _classNames = new DvText[_numClasses]; + _classNames = new ReadOnlyMemory[_numClasses]; classNames.CopyTo(_classNames); } else - _classNames = Utils.BuildArray(_numClasses, i => new DvText(i.ToString())); + _classNames = Utils.BuildArray(_numClasses, i => i.ToString().AsMemory()); var key = new KeyType(DataKind.U4, 0, _numClasses); _types[AssignedCol] = key; @@ -733,12 +731,12 @@ private MultiClassPerInstanceEvaluator(IHostEnvironment env, ModelLoadContext ct Host.CheckDecode(_numClasses > 0); if (ctx.Header.ModelVerWritten > VerInitial) { - _classNames = new DvText[_numClasses]; + _classNames = new ReadOnlyMemory[_numClasses]; for (int i = 0; i < _numClasses; i++) - _classNames[i] = new DvText(ctx.LoadNonEmptyString()); + _classNames[i] = ctx.LoadNonEmptyString().AsMemory(); } else - _classNames = Utils.BuildArray(_numClasses, i => new DvText(i.ToString())); + _classNames = Utils.BuildArray(_numClasses, i => i.ToString().AsMemory()); _types = new ColumnType[4]; var key = new KeyType(DataKind.U4, 0, _numClasses); @@ -898,19 +896,19 @@ public override RowMapperColumnInfo[] GetOutputColumns() var assignedColKeyValues = new ColumnMetadataInfo(Assigned); var keyValueType = new VectorType(TextType.Instance, _numClasses); - assignedColKeyValues.Add(MetadataUtils.Kinds.KeyValues, new MetadataInfo>(keyValueType, CreateKeyValueGetter())); + assignedColKeyValues.Add(MetadataUtils.Kinds.KeyValues, new MetadataInfo>>(keyValueType, CreateKeyValueGetter())); infos[AssignedCol] = new RowMapperColumnInfo(Assigned, _types[AssignedCol], assignedColKeyValues); infos[LogLossCol] = new RowMapperColumnInfo(LogLoss, _types[LogLossCol], null); var slotNamesType = new VectorType(TextType.Instance, _numClasses); var sortedScores = new ColumnMetadataInfo(SortedScores); - sortedScores.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + sortedScores.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(_numClasses, "Score"))); var sortedClasses = new ColumnMetadataInfo(SortedClasses); - sortedClasses.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, + sortedClasses.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(_numClasses, "Class"))); - sortedClasses.Add(MetadataUtils.Kinds.KeyValues, new MetadataInfo>(keyValueType, CreateKeyValueGetter())); + sortedClasses.Add(MetadataUtils.Kinds.KeyValues, new MetadataInfo>>(keyValueType, CreateKeyValueGetter())); infos[SortedScoresCol] = new RowMapperColumnInfo(SortedScores, _types[SortedScoresCol], sortedScores); infos[SortedClassesCol] = new RowMapperColumnInfo(SortedClasses, _types[SortedClassesCol], sortedClasses); @@ -918,31 +916,31 @@ public override RowMapperColumnInfo[] GetOutputColumns() } // REVIEW: Figure out how to avoid having the column name in each slot name. - private MetadataUtils.MetadataGetter> CreateSlotNamesGetter(int numTopClasses, string suffix) + private MetadataUtils.MetadataGetter>> CreateSlotNamesGetter(int numTopClasses, string suffix) { return - (int col, ref VBuffer dst) => + (int col, ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < numTopClasses) - values = new DvText[numTopClasses]; + values = new ReadOnlyMemory[numTopClasses]; for (int i = 1; i <= numTopClasses; i++) - values[i - 1] = new DvText(string.Format("#{0} {1}", i, suffix)); - dst = new VBuffer(numTopClasses, values); + values[i - 1] = string.Format("#{0} {1}", i, suffix).AsMemory(); + dst = new VBuffer>(numTopClasses, values); }; } - private MetadataUtils.MetadataGetter> CreateKeyValueGetter() + private MetadataUtils.MetadataGetter>> CreateKeyValueGetter() { return - (int col, ref VBuffer dst) => + (int col, ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < _numClasses) - values = new DvText[_numClasses]; + values = new ReadOnlyMemory[_numClasses]; for (int i = 0; i < _numClasses; i++) values[i] = _classNames[i]; - dst = new VBuffer(_numClasses, values); + dst = new VBuffer>(_numClasses, values); }; } @@ -1149,9 +1147,9 @@ protected override IDataView GetPerInstanceMetricsCore(IDataView perInst, RoleMa var labelType = perInst.Schema.GetColumnType(labelCol); if (labelType.IsKey && (!perInst.Schema.HasKeyNames(labelCol, labelType.KeyCount) || labelType.RawKind != DataKind.U4)) { - perInst = LambdaColumnMapper.Create(Host, "ConvertToLong", perInst, schema.Label.Name, - schema.Label.Name, perInst.Schema.GetColumnType(labelCol), NumberType.I8, - (ref uint src, ref DvInt8 dst) => dst = src == 0 ? DvInt8.NA : src - 1 + (long)labelType.AsKey.Min); + perInst = LambdaColumnMapper.Create(Host, "ConvertToDouble", perInst, schema.Label.Name, + schema.Label.Name, perInst.Schema.GetColumnType(labelCol), NumberType.R8, + (ref uint src, ref double dst) => dst = src == 0 ? double.NaN : src - 1 + (double)labelType.AsKey.Min); } var perInstSchema = perInst.Schema; diff --git a/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs index fb8d9c1249..d4c70c6eac 100644 --- a/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/QuantileRegressionEvaluator.cs @@ -46,7 +46,7 @@ protected override IRowMapper CreatePerInstanceRowMapper(RoleMappedSchema schema int scoreSize = scoreInfo.Type.VectorSize; var type = schema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, scoreInfo.Index); Host.Check(type != null && type.IsKnownSizeVector && type.ItemType.IsText, "Quantile regression score column must have slot names"); - var quantiles = default(VBuffer); + var quantiles = default(VBuffer>); schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, scoreInfo.Index, ref quantiles); Host.Assert(quantiles.IsDense && quantiles.Length == scoreSize); @@ -73,7 +73,7 @@ protected override Aggregator GetAggregatorCore(RoleMappedSchema schema, string var scoreInfo = schema.GetUniqueColumn(MetadataUtils.Const.ScoreValueKind.Score); var t = scoreInfo.Type; Host.Assert(t.VectorSize > 0 && (t.ItemType == NumberType.R4 || t.ItemType == NumberType.R8)); - var slotNames = default(VBuffer); + var slotNames = default(VBuffer>); t = schema.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, scoreInfo.Index); if (t != null && t.VectorSize == scoreInfo.Type.VectorSize && t.ItemType.IsText) schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, scoreInfo.Index, ref slotNames); @@ -205,14 +205,14 @@ protected override VBuffer Zero() private readonly Counters _counters; private readonly Counters _weightedCounters; - private VBuffer _slotNames; + private VBuffer> _slotNames; public override CountersBase UnweightedCounters { get { return _counters; } } public override CountersBase WeightedCounters { get { return _weightedCounters; } } public Aggregator(IHostEnvironment env, IRegressionLoss lossFunction, bool weighted, int size, - ref VBuffer slotNames, string stratName) + ref VBuffer> slotNames, string stratName) : base(env, lossFunction, weighted, stratName) { Host.Assert(size > 0); @@ -242,8 +242,8 @@ public override void AddColumn(ArrayDataViewBuilder dvBldr, string metricName, p Host.AssertValue(dvBldr); if (_slotNames.Length > 0) { - ValueGetter> getSlotNames = - (ref VBuffer dst) => dst = _slotNames; + ValueGetter>> getSlotNames = + (ref VBuffer> dst) => dst = _slotNames; dvBldr.AddColumn(metricName, getSlotNames, NumberType.R8, metric); } else @@ -272,10 +272,10 @@ private static VersionInfo GetVersionInfo() public const string L2 = "L2-loss"; private readonly int _scoreSize; - private readonly DvText[] _quantiles; + private readonly ReadOnlyMemory[] _quantiles; private readonly ColumnType _outputType; - public QuantileRegressionPerInstanceEvaluator(IHostEnvironment env, ISchema schema, string scoreCol, string labelCol, int scoreSize, DvText[] quantiles) + public QuantileRegressionPerInstanceEvaluator(IHostEnvironment env, ISchema schema, string scoreCol, string labelCol, int scoreSize, ReadOnlyMemory[] quantiles) : base(env, schema, scoreCol, labelCol) { Host.CheckParam(scoreSize > 0, nameof(scoreSize), "must be greater than 0"); @@ -299,9 +299,9 @@ private QuantileRegressionPerInstanceEvaluator(IHostEnvironment env, ModelLoadCo _scoreSize = ctx.Reader.ReadInt32(); Host.CheckDecode(_scoreSize > 0); - _quantiles = new DvText[_scoreSize]; + _quantiles = new ReadOnlyMemory[_scoreSize]; for (int i = 0; i < _scoreSize; i++) - _quantiles[i] = new DvText(ctx.LoadNonEmptyString()); + _quantiles[i] = ctx.LoadNonEmptyString().AsMemory(); _outputType = new VectorType(NumberType.R8, _scoreSize); } @@ -344,26 +344,26 @@ public override RowMapperColumnInfo[] GetOutputColumns() var slotNamesType = new VectorType(TextType.Instance, _scoreSize); var l1Metadata = new ColumnMetadataInfo(L1); - l1Metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, CreateSlotNamesGetter(L1))); + l1Metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(L1))); var l2Metadata = new ColumnMetadataInfo(L2); - l2Metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(slotNamesType, CreateSlotNamesGetter(L2))); + l2Metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(slotNamesType, CreateSlotNamesGetter(L2))); infos[L1Col] = new RowMapperColumnInfo(L1, _outputType, l1Metadata); infos[L2Col] = new RowMapperColumnInfo(L2, _outputType, l2Metadata); return infos; } - private MetadataUtils.MetadataGetter> CreateSlotNamesGetter(string prefix) + private MetadataUtils.MetadataGetter>> CreateSlotNamesGetter(string prefix) { return - (int col, ref VBuffer dst) => + (int col, ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < _scoreSize) - values = new DvText[_scoreSize]; + values = new ReadOnlyMemory[_scoreSize]; for (int i = 0; i < _scoreSize; i++) - values[i] = new DvText(string.Format("{0} ({1})", prefix, _quantiles[i])); - dst = new VBuffer(_scoreSize, values); + values[i] = string.Format("{0} ({1})", prefix, _quantiles[i]).AsMemory(); + dst = new VBuffer>(_scoreSize, values); }; } diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index 616cff8394..131f16b058 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -147,20 +147,20 @@ public override IEnumerable GetOverallMetricColumns() } protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, Aggregator> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); + var stratVal = new List>(); + var isWeighted = new List(); var ndcg = new List(); var dcg = new List(); - var groupName = new List(); + var groupName = new List>(); var groupNdcg = new List(); var groupDcg = new List(); var groupMaxDcg = new List(); var groupStratCol = new List(); - var groupStratVal = new List(); + var groupStratVal = new List>(); bool hasStrats = Utils.Size(dictionaries) > 0; bool hasWeight = aggregator.Weighted; @@ -175,14 +175,14 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); ndcg.Add(agg.UnweightedCounters.Ndcg); dcg.Add(agg.UnweightedCounters.Dcg); if (agg.UnweightedCounters.GroupSummary) { groupStratCol.AddRange(agg.UnweightedCounters.GroupDcg.Select(x => stratColKey)); groupStratVal.AddRange(agg.UnweightedCounters.GroupDcg.Select(x => stratColVal)); - groupName.AddRange(agg.GroupId.Select(sb => new DvText(sb.ToString()))); + groupName.AddRange(agg.GroupId.Select(sb => sb.ToString().AsMemory())); groupNdcg.AddRange(agg.UnweightedCounters.GroupNdcg); groupDcg.AddRange(agg.UnweightedCounters.GroupDcg); groupMaxDcg.AddRange(agg.UnweightedCounters.GroupMaxDcg); @@ -192,7 +192,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); ndcg.Add(agg.WeightedCounters.Ndcg); dcg.Add(agg.WeightedCounters.Dcg); } @@ -386,7 +386,7 @@ public void UpdateGroup(Single weight) public readonly Counters UnweightedCounters; public readonly Counters WeightedCounters; public readonly bool Weighted; - public readonly List GroupId; + public readonly List> GroupId; private int _groupSize; public Aggregator(IHostEnvironment env, Double[] labelGains, int truncationLevel, bool groupSummary, bool weighted, string stratName) @@ -402,7 +402,7 @@ public Aggregator(IHostEnvironment env, Double[] labelGains, int truncationLevel _currentQueryWeight = Single.NaN; if (groupSummary) - GroupId = new List(); + GroupId = new List>(); } public override void InitializeNextPass(IRow row, RoleMappedSchema schema) @@ -472,7 +472,7 @@ private void ProcessGroup() if (WeightedCounters != null) WeightedCounters.UpdateGroup(_currentQueryWeight); if (GroupId != null) - GroupId.Add(new DvText(_groupSb.ToString())); + GroupId.Add(_groupSb.ToString().AsMemory()); _currentQueryWeight = Single.NaN; } @@ -483,30 +483,30 @@ protected override void FinishPassCore() ProcessGroup(); } - public ValueGetter> GetGroupSummarySlotNames(string prefix) + public ValueGetter>> GetGroupSummarySlotNames(string prefix) { return - (ref VBuffer dst) => + (ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < UnweightedCounters.TruncationLevel) - values = new DvText[UnweightedCounters.TruncationLevel]; + values = new ReadOnlyMemory[UnweightedCounters.TruncationLevel]; for (int i = 0; i < UnweightedCounters.TruncationLevel; i++) - values[i] = new DvText(string.Format("{0}@{1}", prefix, i + 1)); - dst = new VBuffer(UnweightedCounters.TruncationLevel, values); + values[i] = string.Format("{0}@{1}", prefix, i + 1).AsMemory(); + dst = new VBuffer>(UnweightedCounters.TruncationLevel, values); }; } - public void GetSlotNames(ref VBuffer slotNames) + public void GetSlotNames(ref VBuffer> slotNames) { var values = slotNames.Values; if (Utils.Size(values) < UnweightedCounters.TruncationLevel) - values = new DvText[UnweightedCounters.TruncationLevel]; + values = new ReadOnlyMemory[UnweightedCounters.TruncationLevel]; for (int i = 0; i < UnweightedCounters.TruncationLevel; i++) - values[i] = new DvText(string.Format("@{0}", i + 1)); - slotNames = new VBuffer(UnweightedCounters.TruncationLevel, values); + values[i] = string.Format("@{0}", i + 1).AsMemory(); + slotNames = new VBuffer>(UnweightedCounters.TruncationLevel, values); } } } @@ -588,7 +588,7 @@ private sealed class Bindings : BindingsBase private readonly ColumnType _outputType; private readonly ColumnType _slotNamesType; private readonly int _truncationLevel; - private readonly MetadataUtils.MetadataGetter> _slotNamesGetter; + private readonly MetadataUtils.MetadataGetter>> _slotNamesGetter; public Bindings(IExceptionContext ectx, ISchema input, bool user, string labelCol, string scoreCol, string groupCol, int truncationLevel) @@ -633,17 +633,17 @@ protected override void GetMetadataCore(string kind, int iinfo, ref TVal base.GetMetadataCore(kind, iinfo, ref value); } - private void SlotNamesGetter(int iinfo, ref VBuffer dst) + private void SlotNamesGetter(int iinfo, ref VBuffer> dst) { Contracts.Assert(0 <= iinfo && iinfo < InfoCount); var values = dst.Values; if (Utils.Size(values) < _truncationLevel) - values = new DvText[_truncationLevel]; + values = new ReadOnlyMemory[_truncationLevel]; for (int i = 0; i < _truncationLevel; i++) values[i] = - new DvText(string.Format("{0}@{1}", iinfo == NdcgCol ? Ndcg : iinfo == DcgCol ? Dcg : MaxDcg, - i + 1)); - dst = new VBuffer(_truncationLevel, values); + string.Format("{0}@{1}", iinfo == NdcgCol ? Ndcg : iinfo == DcgCol ? Dcg : MaxDcg, + i + 1).AsMemory(); + dst = new VBuffer>(_truncationLevel, values); } } diff --git a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluatorBase.cs b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluatorBase.cs index 9962897373..c4f55ba2ba 100644 --- a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluatorBase.cs +++ b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluatorBase.cs @@ -43,11 +43,11 @@ protected RegressionEvaluatorBase(ArgumentsBase args, IHostEnvironment env, stri } protected override void GetAggregatorConsolidationFuncs(TAgg aggregator, AggregatorDictionaryBase[] dictionaries, - out Action addAgg, out Func> consolidate) + out Action, TAgg> addAgg, out Func> consolidate) { var stratCol = new List(); - var stratVal = new List(); - var isWeighted = new List(); + var stratVal = new List>(); + var isWeighted = new List(); var l1 = new List(); var l2 = new List(); var rms = new List(); @@ -64,7 +64,7 @@ protected override void GetAggregatorConsolidationFuncs(TAgg aggregator, Aggrega stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.False); + isWeighted.Add(false); l1.Add(agg.UnweightedCounters.L1); l2.Add(agg.UnweightedCounters.L2); rms.Add(agg.UnweightedCounters.Rms); @@ -74,7 +74,7 @@ protected override void GetAggregatorConsolidationFuncs(TAgg aggregator, Aggrega { stratCol.Add(stratColKey); stratVal.Add(stratColVal); - isWeighted.Add(DvBool.True); + isWeighted.Add(true); l1.Add(agg.WeightedCounters.L1); l2.Add(agg.WeightedCounters.L2); rms.Add(agg.WeightedCounters.Rms); diff --git a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj index 125ee8f479..5f147c496c 100644 --- a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj +++ b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj @@ -1,4 +1,4 @@ - + netstandard2.0 @@ -18,7 +18,6 @@ - diff --git a/src/Microsoft.ML.Data/Model/ModelHeader.cs b/src/Microsoft.ML.Data/Model/ModelHeader.cs index 8f060cf86f..067ff12285 100644 --- a/src/Microsoft.ML.Data/Model/ModelHeader.cs +++ b/src/Microsoft.ML.Data/Model/ModelHeader.cs @@ -150,7 +150,7 @@ public static void EndWrite(BinaryWriter writer, long fpMin, ref ModelHeader hea Contracts.Assert(header.FpStringChars == header.FpStringTable + header.CbStringTable); foreach (var ns in pool) { - foreach (var ch in ns.Value) + foreach (var ch in ns.Value.Span) writer.Write((short)ch); } header.CbStringChars = writer.FpCur() - header.FpStringChars - fpMin; diff --git a/src/Microsoft.ML.Data/Model/ModelSaveContext.cs b/src/Microsoft.ML.Data/Model/ModelSaveContext.cs index 82c9c7c3cc..5e32893e7d 100644 --- a/src/Microsoft.ML.Data/Model/ModelSaveContext.cs +++ b/src/Microsoft.ML.Data/Model/ModelSaveContext.cs @@ -185,6 +185,11 @@ public void SaveString(string str) Writer.Write(Strings.Add(str).Id); } + public void SaveString(ReadOnlyMemory str) + { + Writer.Write(Strings.Add(str).Id); + } + /// /// Puts a string into the context pool, and writes the integer code of the string ID /// to the write stream. @@ -195,6 +200,11 @@ public void SaveNonEmptyString(string str) Writer.Write(Strings.Add(str).Id); } + public void SaveNonEmptyString(ReadOnlyMemory str) + { + Writer.Write(Strings.Add(str).Id); + } + /// /// Commit the save operation. This completes writing of the main stream. When in repository /// mode, it disposes the Writer (but not the repository). diff --git a/src/Microsoft.ML.Data/Model/Onnx/OnnxNode.cs b/src/Microsoft.ML.Data/Model/Onnx/OnnxNode.cs index 259a6d27d4..79df068b9b 100644 --- a/src/Microsoft.ML.Data/Model/Onnx/OnnxNode.cs +++ b/src/Microsoft.ML.Data/Model/Onnx/OnnxNode.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using Microsoft.ML.Runtime.Data; @@ -17,14 +18,14 @@ public abstract class OnnxNode { public abstract void AddAttribute(string argName, double value); public abstract void AddAttribute(string argName, long value); - public abstract void AddAttribute(string argName, DvText value); + public abstract void AddAttribute(string argName, ReadOnlyMemory value); public abstract void AddAttribute(string argName, string value); public abstract void AddAttribute(string argName, bool value); public abstract void AddAttribute(string argName, IEnumerable value); public abstract void AddAttribute(string argName, IEnumerable value); public abstract void AddAttribute(string argName, IEnumerable value); - public abstract void AddAttribute(string argName, IEnumerable value); + public abstract void AddAttribute(string argName, IEnumerable> value); public abstract void AddAttribute(string argName, string[] value); public abstract void AddAttribute(string argName, IEnumerable value); public abstract void AddAttribute(string argName, IEnumerable value); diff --git a/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs b/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs index 6da402431d..ee76b5a674 100644 --- a/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs +++ b/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs @@ -250,8 +250,8 @@ protected override Delegate GetPredictedLabelGetter(IRow output, out Delegate sc return predFnAsKey; } - ValueGetter predFn = - (ref DvBool dst) => + ValueGetter predFn = + (ref bool dst) => { EnsureCachedPosition(ref cachedPosition, ref score, output, mapperScoreGetter); GetPredictedLabelCore(score, ref dst); @@ -259,9 +259,10 @@ protected override Delegate GetPredictedLabelGetter(IRow output, out Delegate sc return predFn; } - private void GetPredictedLabelCore(Float score, ref DvBool value) + private void GetPredictedLabelCore(Float score, ref bool value) { - value = score > _threshold ? DvBool.True : score <= _threshold ? DvBool.False : DvBool.NA; + //Behavior for NA values is undefined. + value = score > _threshold; } private void GetPredictedLabelCoreAsKey(Float score, ref uint value) diff --git a/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs b/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs index 2fd039897a..643c7cb3b2 100644 --- a/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs +++ b/src/Microsoft.ML.Data/Scorers/PredictedLabelScorerBase.cs @@ -39,8 +39,8 @@ protected sealed class BindingsImpl : BindingsBase // The ScoreColumnKind metadata value for all score columns. public readonly string ScoreColumnKind; - private readonly MetadataUtils.MetadataGetter _getScoreColumnKind; - private readonly MetadataUtils.MetadataGetter _getScoreValueKind; + private readonly MetadataUtils.MetadataGetter> _getScoreColumnKind; + private readonly MetadataUtils.MetadataGetter> _getScoreValueKind; private readonly IRow _predColMetadata; private BindingsImpl(ISchema input, ISchemaBoundRowMapper mapper, string suffix, string scoreColumnKind, @@ -251,17 +251,17 @@ protected override void GetMetadataCore(string kind, int iinfo, ref TVal base.GetMetadataCore(kind, iinfo, ref value); } - private void GetScoreColumnKind(int iinfo, ref DvText dst) + private void GetScoreColumnKind(int iinfo, ref ReadOnlyMemory dst) { Contracts.Assert(0 <= iinfo && iinfo < InfoCount); - dst = new DvText(ScoreColumnKind); + dst = ScoreColumnKind.AsMemory(); } - private void GetScoreValueKind(int iinfo, ref DvText dst) + private void GetScoreValueKind(int iinfo, ref ReadOnlyMemory dst) { // This should only get called for the derived column. Contracts.Assert(0 <= iinfo && iinfo < DerivedColumnCount); - dst = new DvText(MetadataUtils.Const.ScoreValueKind.PredictedLabel); + dst = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory(); } public override Func GetActiveMapperColumns(bool[] active) diff --git a/src/Microsoft.ML.Data/Scorers/SchemaBindablePredictorWrapper.cs b/src/Microsoft.ML.Data/Scorers/SchemaBindablePredictorWrapper.cs index 2a017474ed..31b921a064 100644 --- a/src/Microsoft.ML.Data/Scorers/SchemaBindablePredictorWrapper.cs +++ b/src/Microsoft.ML.Data/Scorers/SchemaBindablePredictorWrapper.cs @@ -678,7 +678,7 @@ protected override Delegate GetPredictionGetter(IRow input, int colSrc) private sealed class Schema : ScoreMapperSchemaBase { private readonly string[] _slotNames; - private readonly MetadataUtils.MetadataGetter> _getSlotNames; + private readonly MetadataUtils.MetadataGetter>> _getSlotNames; public Schema(ColumnType scoreType, Double[] quantiles) : base(scoreType, MetadataUtils.Const.ScoreColumnKind.QuantileRegression) @@ -737,7 +737,7 @@ public override ColumnType GetColumnType(int col) return new VectorType(NumberType.Float, _slotNames.Length); } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Contracts.Assert(iinfo == 0); Contracts.Assert(Utils.Size(_slotNames) > 0); @@ -745,10 +745,10 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) int size = Utils.Size(_slotNames); var values = dst.Values; if (Utils.Size(values) < size) - values = new DvText[size]; + values = new ReadOnlyMemory[size]; for (int i = 0; i < _slotNames.Length; i++) - values[i] = new DvText(_slotNames[i]); - dst = new VBuffer(size, values, dst.Indices); + values[i] = _slotNames[i].AsMemory(); + dst = new VBuffer>(size, values, dst.Indices); } } } diff --git a/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs b/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs index 0f115bb2f0..023a2f46f0 100644 --- a/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs +++ b/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs @@ -4,6 +4,7 @@ using Float = System.Single; using System.Collections.Generic; +using System; namespace Microsoft.ML.Runtime.Data { @@ -17,8 +18,8 @@ public abstract class ScoreMapperSchemaBase : ISchema { protected readonly ColumnType ScoreType; protected readonly string ScoreColumnKind; - protected readonly MetadataUtils.MetadataGetter ScoreValueKindGetter; - protected readonly MetadataUtils.MetadataGetter ScoreColumnKindGetter; + protected readonly MetadataUtils.MetadataGetter> ScoreValueKindGetter; + protected readonly MetadataUtils.MetadataGetter> ScoreColumnKindGetter; public ScoreMapperSchemaBase(ColumnType scoreType, string scoreColumnKind) { @@ -117,16 +118,16 @@ public virtual void GetMetadata(string kind, int col, ref TValue value) } } - protected virtual void GetScoreValueKind(int col, ref DvText dst) + protected virtual void GetScoreValueKind(int col, ref ReadOnlyMemory dst) { Contracts.Assert(0 <= col && col < ColumnCount); CheckColZero(col, "GetScoreValueKind"); - dst = new DvText(MetadataUtils.Const.ScoreValueKind.Score); + dst = MetadataUtils.Const.ScoreValueKind.Score.AsMemory(); } - private void GetScoreColumnKind(int col, ref DvText dst) + private void GetScoreColumnKind(int col, ref ReadOnlyMemory dst) { - dst = new DvText(ScoreColumnKind); + dst = ScoreColumnKind.AsMemory(); } } @@ -205,21 +206,21 @@ public override void GetMetadata(string kind, int col, ref TValue value) Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); if (col == base.ColumnCount && kind == MetadataUtils.Kinds.IsNormalized) - MetadataUtils.Marshal(IsNormalized, col, ref value); + MetadataUtils.Marshal(IsNormalized, col, ref value); else base.GetMetadata(kind, col, ref value); } - private void IsNormalized(int col, ref DvBool dst) + private void IsNormalized(int col, ref bool dst) { - dst = DvBool.True; + dst = true; } - protected override void GetScoreValueKind(int col, ref DvText dst) + protected override void GetScoreValueKind(int col, ref ReadOnlyMemory dst) { Contracts.Assert(0 <= col && col < ColumnCount); if (col == base.ColumnCount) - dst = new DvText(MetadataUtils.Const.ScoreValueKind.Probability); + dst = MetadataUtils.Const.ScoreValueKind.Probability.AsMemory(); else base.GetScoreValueKind(col, ref dst); } @@ -228,8 +229,8 @@ protected override void GetScoreValueKind(int col, ref DvText dst) public sealed class SequencePredictorSchema : ScoreMapperSchemaBase { private readonly VectorType _keyNamesType; - private readonly VBuffer _keyNames; - private readonly MetadataUtils.MetadataGetter> _getKeyNames; + private readonly VBuffer> _keyNames; + private readonly MetadataUtils.MetadataGetter>> _getKeyNames; private bool HasKeyNames { get { return _keyNamesType != null; } } @@ -241,7 +242,7 @@ public sealed class SequencePredictorSchema : ScoreMapperSchemaBase /// metadata. Note that we do not copy /// the input key names, but instead take a reference to it. /// - public SequencePredictorSchema(ColumnType type, ref VBuffer keyNames, string scoreColumnKind) + public SequencePredictorSchema(ColumnType type, ref VBuffer> keyNames, string scoreColumnKind) : base(type, scoreColumnKind) { if (keyNames.Length > 0) @@ -273,7 +274,7 @@ public override string GetColumnName(int col) return MetadataUtils.Const.ScoreValueKind.PredictedLabel; } - private void GetKeyNames(int col, ref VBuffer dst) + private void GetKeyNames(int col, ref VBuffer> dst) { Contracts.Assert(col == 0); Contracts.AssertValue(_keyNamesType); @@ -321,10 +322,10 @@ public override ColumnType GetMetadataTypeOrNull(string kind, int col) } } - protected override void GetScoreValueKind(int col, ref DvText dst) + protected override void GetScoreValueKind(int col, ref ReadOnlyMemory dst) { Contracts.Assert(col == 0); - dst = new DvText(MetadataUtils.Const.ScoreValueKind.PredictedLabel); + dst = MetadataUtils.Const.ScoreValueKind.PredictedLabel.AsMemory(); } } } diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs b/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs index 39cc6cd316..161ea8f879 100644 --- a/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs +++ b/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs @@ -168,7 +168,7 @@ private static Type GetTypeOrNull(SchemaShape.Column col) pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I4 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U4 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || - pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance || + pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return (vecType ?? typeof(Scalar<>)).MakeGenericType(physType); @@ -254,9 +254,9 @@ private static Type GetTypeOrNull(IColumn col) var normtype = meta.Schema.GetColumnType(normcol); if (normtype == BoolType.Instance) { - DvBool val = default; - meta.GetGetter(normcol)(ref val); - if (val.IsTrue) + bool val = default; + meta.GetGetter(normcol)(ref val); + if (val) vecType = typeof(NormVector<>); } } @@ -312,7 +312,7 @@ private static Type GetTypeOrNull(IColumn col) pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || - pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance || + pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return (vecType ?? typeof(Scalar<>)).MakeGenericType(physType); diff --git a/src/Microsoft.ML.Data/Training/TrainerUtils.cs b/src/Microsoft.ML.Data/Training/TrainerUtils.cs index 33d3d1490d..b86daaace4 100644 --- a/src/Microsoft.ML.Data/Training/TrainerUtils.cs +++ b/src/Microsoft.ML.Data/Training/TrainerUtils.cs @@ -593,7 +593,7 @@ public void Signal(CursOpt opt) } /// - /// This supports Weight (Float), Group (ulong), and Id (DvInt8) columns. + /// This supports Weight (Float), Group (ulong), and Id (UInt128) columns. /// public class StandardScalarCursor : TrainingCursorBase { diff --git a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs index e0f6c1dfdf..d827947192 100644 --- a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs @@ -126,7 +126,6 @@ public sealed class TaggedArguments public sealed class ColumnInfo { public readonly string Output; - private readonly (string name, string alias)[] _inputs; public IReadOnlyList<(string name, string alias)> Inputs => _inputs.AsReadOnly(); @@ -231,7 +230,6 @@ public ConcatTransform(IHostEnvironment env, params ColumnInfo[] columns) Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ConcatTransform)); Contracts.CheckValue(columns, nameof(columns)); - _columns = columns.ToArray(); } @@ -568,20 +566,20 @@ public RowMapperColumnInfo MakeColumnInfo() var metadata = new ColumnMetadataInfo(_columnInfo.Output); if (_isNormalized) - metadata.Add(MetadataUtils.Kinds.IsNormalized, new MetadataInfo(BoolType.Instance, GetIsNormalized)); + metadata.Add(MetadataUtils.Kinds.IsNormalized, new MetadataInfo(BoolType.Instance, GetIsNormalized)); if (_hasSlotNames) - metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(_slotNamesType, GetSlotNames)); + metadata.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(_slotNamesType, GetSlotNames)); if (_hasCategoricals) - metadata.Add(MetadataUtils.Kinds.CategoricalSlotRanges, new MetadataInfo>(_categoricalRangeType, GetCategoricalSlotRanges)); + metadata.Add(MetadataUtils.Kinds.CategoricalSlotRanges, new MetadataInfo>(_categoricalRangeType, GetCategoricalSlotRanges)); return new RowMapperColumnInfo(_columnInfo.Output, OutputType, metadata); } - private void GetIsNormalized(int col, ref DvBool value) => value = _isNormalized; + private void GetIsNormalized(int col, ref bool value) => value = _isNormalized; - private void GetCategoricalSlotRanges(int iiinfo, ref VBuffer dst) + private void GetCategoricalSlotRanges(int iiinfo, ref VBuffer dst) { - List allValues = new List(); + List allValues = new List(); int slotCount = 0; for (int i = 0; i < SrcIndices.Length; i++) { @@ -602,10 +600,10 @@ private void GetCategoricalSlotRanges(int iiinfo, ref VBuffer dst) Contracts.Assert(allValues.Count > 0); - dst = new VBuffer(allValues.Count, allValues.ToArray()); + dst = new VBuffer(allValues.Count, allValues.ToArray()); } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Contracts.Assert(!_isIdentity); Contracts.Assert(OutputType.VectorSize > 0); @@ -613,11 +611,11 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Contracts.AssertValue(_slotNamesType); Contracts.Assert(_slotNamesType.VectorSize == OutputType.VectorSize); - var bldr = BufferBuilder.CreateDefault(); + var bldr = BufferBuilder>.CreateDefault(); bldr.Reset(_slotNamesType.VectorSize, dense: false); var sb = new StringBuilder(); - var names = default(VBuffer); + var names = default(VBuffer>); int slot = 0; for (int i = 0; i < _srcTypes.Length; i++) { @@ -628,7 +626,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) var nameSrc = _columnInfo.Inputs[i].alias ?? colName; if (!typeSrc.IsVector) { - bldr.AddFeature(slot++, new DvText(nameSrc)); + bldr.AddFeature(slot++, nameSrc.AsMemory()); continue; } @@ -643,11 +641,11 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) int len = sb.Length; foreach (var kvp in names.Items()) { - if (!kvp.Value.HasChars) + if (kvp.Value.IsEmpty) continue; sb.Length = len; - kvp.Value.AddToStringBuilder(sb); - bldr.AddFeature(slot + kvp.Key, new DvText(sb.ToString())); + sb.AppendMemory(kvp.Value); + bldr.AddFeature(slot + kvp.Key, sb.ToString().AsMemory()); } } slot += _srcTypes[i].VectorSize; diff --git a/src/Microsoft.ML.Data/Transforms/ConvertTransform.cs b/src/Microsoft.ML.Data/Transforms/ConvertTransform.cs index 52005c7558..eec59c8a8f 100644 --- a/src/Microsoft.ML.Data/Transforms/ConvertTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ConvertTransform.cs @@ -246,7 +246,7 @@ private void SetMetadata() using (var bldr = md.BuildMetadata(iinfo, Source.Schema, info.Source, PassThrough)) { if (info.TypeSrc.IsBool && _exes[iinfo].TypeDst.ItemType.IsNumber) - bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True); + bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, true); } } md.Seal(); diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index 230cfbe680..9945ee6cc3 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -384,7 +384,7 @@ private void ComputeType(ISchema input, int[] slotsMin, int[] slotsMax, int iinf if (hasSlotNames && dstLength > 0) { // Add slot name metadata. - bldr.AddGetter>(MetadataUtils.Kinds.SlotNames, + bldr.AddGetter>>(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, dstLength), GetSlotNames); } } @@ -393,14 +393,14 @@ private void ComputeType(ISchema input, int[] slotsMin, int[] slotsMax, int iinf { if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges)) { - VBuffer dst = default(VBuffer); + VBuffer dst = default(VBuffer); GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst); // REVIEW: cache dst as opposed to caculating it again. if (dst.Length > 0) { Contracts.Assert(dst.Length % 2 == 0); - bldr.AddGetter>(MetadataUtils.Kinds.CategoricalSlotRanges, + bldr.AddGetter>(MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges); } } @@ -433,17 +433,17 @@ protected override ColumnType GetColumnTypeCore(int iinfo) return _exes[iinfo].TypeDst; } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); - var names = default(VBuffer); + var names = default(VBuffer>); Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref names); var infoEx = _exes[iinfo]; infoEx.SlotDropper.DropSlots(ref names, ref dst); } - private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) + private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) { if (_exes[iinfo].CategoricalRanges != null) { @@ -452,7 +452,7 @@ private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) } } - private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slotsMax, int[] catRanges, ref VBuffer dst) + private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slotsMax, int[] catRanges, ref VBuffer dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(slotsMax != null && slotsMin != null); @@ -467,9 +467,9 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots int previousDropSlotsIndex = 0; int droppedSlotsCount = 0; bool combine = false; - DvInt4 min = -1; - DvInt4 max = -1; - List newCategoricalSlotRanges = new List(); + int min = -1; + int max = -1; + List newCategoricalSlotRanges = new List(); // Six possible ways a drop slot range interacts with categorical slots range. // @@ -498,7 +498,7 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots } else { - Contracts.Assert(min.RawValue == -1 && max.RawValue == -1); + Contracts.Assert(min == -1 && max == -1); min = ranges[rangesIndex] - droppedSlotsCount; max = ranges[rangesIndex + 1] - droppedSlotsCount; } @@ -515,14 +515,14 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots rangesIndex += 2; if (combine) { - Contracts.Assert(min.RawValue >= 0 && min.RawValue <= max.RawValue); + Contracts.Assert(min >= 0 && min <= max); newCategoricalSlotRanges.Add(min); newCategoricalSlotRanges.Add(max); min = max = -1; combine = false; } - Contracts.Assert(min.RawValue == -1 && max.RawValue == -1); + Contracts.Assert(min == -1 && max == -1); } else if (slotsMin[dropSlotsIndex] > ranges[rangesIndex] && @@ -535,7 +535,7 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots } else { - Contracts.Assert(min.RawValue == -1 && max.RawValue == -1); + Contracts.Assert(min == -1 && max == -1); min = ranges[rangesIndex] - droppedSlotsCount; max = slotsMin[dropSlotsIndex] - 1 - droppedSlotsCount; @@ -576,28 +576,28 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots min = max = -1; } - Contracts.Assert(min.RawValue == -1 && max.RawValue == -1); + Contracts.Assert(min == -1 && max == -1); for (int i = rangesIndex; i < ranges.Length; i++) newCategoricalSlotRanges.Add(ranges[i] - droppedSlotsCount); Contracts.Assert(newCategoricalSlotRanges.Count % 2 == 0); - Contracts.Assert(newCategoricalSlotRanges.TrueForAll(x => x.RawValue >= 0)); + Contracts.Assert(newCategoricalSlotRanges.TrueForAll(x => x >= 0)); Contracts.Assert(0 <= droppedSlotsCount && droppedSlotsCount <= slotsMax[slotsMax.Length - 1] + 1); if (newCategoricalSlotRanges.Count > 0) - dst = new VBuffer(newCategoricalSlotRanges.Count, newCategoricalSlotRanges.ToArray()); + dst = new VBuffer(newCategoricalSlotRanges.Count, newCategoricalSlotRanges.ToArray()); } private void CombineRanges( - DvInt4 minRange1, DvInt4 maxRange1, DvInt4 minRange2, DvInt4 maxRange2, - out DvInt4 newRangeMin, out DvInt4 newRangeMax) + int minRange1, int maxRange1, int minRange2, int maxRange2, + out int newRangeMin, out int newRangeMax) { - Contracts.Assert(minRange2.RawValue >= 0 && maxRange2.RawValue >= 0); - Contracts.Assert(minRange2.RawValue <= maxRange2.RawValue); - Contracts.Assert(minRange1.RawValue >= 0 && maxRange1.RawValue >= 0); - Contracts.Assert(minRange1.RawValue <= maxRange1.RawValue); - Contracts.Assert(maxRange1.RawValue + 1 == minRange2.RawValue); + Contracts.Assert(minRange2 >= 0 && maxRange2 >= 0); + Contracts.Assert(minRange2 <= maxRange2); + Contracts.Assert(minRange1 >= 0 && maxRange1 >= 0); + Contracts.Assert(minRange1 <= maxRange1); + Contracts.Assert(maxRange1 + 1 == minRange2); newRangeMin = minRange1; newRangeMax = maxRange2; diff --git a/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs b/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs index cacd681141..c141bb66c1 100644 --- a/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs @@ -213,17 +213,17 @@ protected override void GetMetadataCore(string kind, int iinfo, ref TVal Contracts.Assert(0 <= iinfo & iinfo < InfoCount); if (kind == MetadataUtils.Kinds.IsNormalized && !UseCounter[iinfo]) { - MetadataUtils.Marshal(IsNormalized, iinfo, ref value); + MetadataUtils.Marshal(IsNormalized, iinfo, ref value); return; } base.GetMetadataCore(kind, iinfo, ref value); } - private void IsNormalized(int iinfo, ref DvBool dst) + private void IsNormalized(int iinfo, ref bool dst) { Contracts.Assert(0 <= iinfo & iinfo < InfoCount); - dst = DvBool.True; + dst = true; } public Func GetDependencies(Func predicate) @@ -430,9 +430,9 @@ public ValueGetter GetGetter(int col) return fn; } - private ValueGetter MakeGetter() + private ValueGetter MakeGetter() { - return (ref DvInt8 value) => + return (ref long value) => { Ch.Check(IsGood); value = Input.Position; diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index e0aa599e49..8eec4e1581 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -191,7 +191,7 @@ private static VersionInfo GetVersionInfo() private readonly ColInfoEx[] _exes; private readonly ColumnType[] _types; - private readonly VBuffer[] _keyValues; + private readonly VBuffer>[] _keyValues; private readonly ColumnType[] _kvTypes; public static HashTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) @@ -311,7 +311,7 @@ public HashTransform(IHostEnvironment env, Arguments args, IDataView input) for (int i = 0; i < helpers.Length; ++i) helpers[i].Process(); } - _keyValues = new VBuffer[_exes.Length]; + _keyValues = new VBuffer>[_exes.Length]; _kvTypes = new ColumnType[_exes.Length]; for (int i = 0; i < helpers.Length; ++i) { @@ -390,13 +390,13 @@ private void SetMetadata() MetadataUtils.Kinds.SlotNames)) { if (_kvTypes != null && _kvTypes[iinfo] != null) - bldr.AddGetter>(MetadataUtils.Kinds.KeyValues, _kvTypes[iinfo], GetTerms); + bldr.AddGetter>>(MetadataUtils.Kinds.KeyValues, _kvTypes[iinfo], GetTerms); } } md.Seal(); } - private void GetTerms(int iinfo, ref VBuffer dst) + private void GetTerms(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(Utils.Size(_keyValues) == Infos.Length); @@ -433,7 +433,7 @@ private ValueGetter ComposeGetterOne(IRow input, int iinfo) switch (colType.RawKind) { case DataKind.Text: - return ComposeGetterOneCore(GetSrcGetter(input, iinfo), seed, mask); + return ComposeGetterOneCore(GetSrcGetter>(input, iinfo), seed, mask); case DataKind.U1: return ComposeGetterOneCore(GetSrcGetter(input, iinfo), seed, mask); case DataKind.U2: @@ -450,9 +450,9 @@ private ValueGetter ComposeGetterOne(IRow input, int iinfo) } } - private ValueGetter ComposeGetterOneCore(ValueGetter getSrc, uint seed, uint mask) + private ValueGetter ComposeGetterOneCore(ValueGetter> getSrc, uint seed, uint mask) { - DvText src = default(DvText); + ReadOnlyMemory src = default; return (ref uint dst) => { @@ -546,7 +546,7 @@ private ValueGetter> ComposeGetterVec(IRow input, int iinfo) switch (colType.ItemType.RawKind) { case DataKind.Text: - return ComposeGetterVecCore(input, iinfo, HashUnord, HashDense, HashSparse); + return ComposeGetterVecCore>(input, iinfo, HashUnord, HashDense, HashSparse); case DataKind.U1: return ComposeGetterVecCore(input, iinfo, HashUnord, HashDense, HashSparse); case DataKind.U2: @@ -670,28 +670,28 @@ private ValueGetter> ComposeGetterVecCoreFloat(IRow input, int #region Core Hash functions, with and without index [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint HashCore(uint seed, ref DvText value, uint mask) + private static uint HashCore(uint seed, ref ReadOnlyMemory value, uint mask) { Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (!value.HasChars) + if (value.IsEmpty) return 0; - return (value.Trim().Hash(seed) & mask) + 1; + return (Hashing.MurmurHash(seed, value.Span.Trim(' ')) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint HashCore(uint seed, ref DvText value, int i, uint mask) + private static uint HashCore(uint seed, ref ReadOnlyMemory value, int i, uint mask) { Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (!value.HasChars) + if (value.IsEmpty) return 0; - return (value.Trim().Hash(Hashing.MurmurRound(seed, (uint)i)) & mask) + 1; + return (Hashing.MurmurHash(Hashing.MurmurRound(seed, (uint)i), value.Span.Trim(' ')) & mask) + 1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static uint HashCore(uint seed, ref float value, uint mask) { Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (value.IsNA()) + if (float.IsNaN(value)) return 0; // (value == 0 ? 0 : value) takes care of negative 0, its equal to positive 0 according to the IEEE 754 standard return (Hashing.MixHash(Hashing.MurmurRound(seed, FloatUtils.GetBits(value == 0 ? 0 : value))) & mask) + 1; @@ -701,7 +701,7 @@ private static uint HashCore(uint seed, ref float value, uint mask) private static uint HashCore(uint seed, ref float value, int i, uint mask) { Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (value.IsNA()) + if (float.IsNaN(value)) return 0; return (Hashing.MixHash(Hashing.MurmurRound(Hashing.MurmurRound(seed, (uint)i), FloatUtils.GetBits(value == 0 ? 0: value))) & mask) + 1; @@ -711,7 +711,7 @@ private static uint HashCore(uint seed, ref float value, int i, uint mask) private static uint HashCore(uint seed, ref double value, uint mask) { Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (value.IsNA()) + if (double.IsNaN(value)) return 0; ulong v = FloatUtils.GetBits(value == 0 ? 0 : value); @@ -727,7 +727,7 @@ private static uint HashCore(uint seed, ref double value, int i, uint mask) { // If the high word is zero, this should produce the same value as the uint version. Contracts.Assert(Utils.IsPowerOfTwo(mask + 1)); - if (value.IsNA()) + if (double.IsNaN(value)) return 0; ulong v = FloatUtils.GetBits(value == 0 ? 0 : value); @@ -790,7 +790,7 @@ private static uint HashCore(uint seed, ulong value, int i, uint mask) #endregion Core Hash functions, with and without index #region Unordered Loop: ignore indices - private static void HashUnord(int count, int[] indices, DvText[] src, uint[] dst, uint seed, uint mask) + private static void HashUnord(int count, int[] indices, ReadOnlyMemory[] src, uint[] dst, uint seed, uint mask) { AssertValid(count, src, dst); @@ -848,7 +848,7 @@ private static void HashUnord(int count, int[] indices, double[] src, uint[] dst #endregion Unordered Loop: ignore indices #region Dense Loop: ignore indices - private static void HashDense(int count, int[] indices, DvText[] src, uint[] dst, uint seed, uint mask) + private static void HashDense(int count, int[] indices, ReadOnlyMemory[] src, uint[] dst, uint seed, uint mask) { AssertValid(count, src, dst); @@ -905,7 +905,7 @@ private static void HashDense(int count, int[] indices, double[] src, uint[] dst #endregion Dense Loop: ignore indices #region Sparse Loop: use indices - private static void HashSparse(int count, int[] indices, DvText[] src, uint[] dst, uint seed, uint mask) + private static void HashSparse(int count, int[] indices, ReadOnlyMemory[] src, uint[] dst, uint seed, uint mask) { AssertValid(count, src, dst); Contracts.Assert(count <= Utils.Size(indices)); @@ -1051,9 +1051,9 @@ public static InvertHashHelper Create(IRow row, ColInfo info, ColInfoEx ex, int /// public abstract void Process(); - public abstract VBuffer GetKeyValuesMetadata(); + public abstract VBuffer> GetKeyValuesMetadata(); - private sealed class TextEqualityComparer : IEqualityComparer + private sealed class TextEqualityComparer : IEqualityComparer> { // REVIEW: Is this sufficiently useful? Should we be using term map, instead? private readonly uint _seed; @@ -1063,16 +1063,13 @@ public TextEqualityComparer(uint seed) _seed = seed; } - public bool Equals(DvText x, DvText y) - { - return x.Equals(y); - } + public bool Equals(ReadOnlyMemory x, ReadOnlyMemory y) => x.Span.SequenceEqual(y.Span); - public int GetHashCode(DvText obj) + public int GetHashCode(ReadOnlyMemory obj) { - if (!obj.HasChars) + if (obj.IsEmpty) return 0; - return (int)obj.Trim().Hash(_seed) + 1; + return (int)Hashing.MurmurHash(_seed, obj.Span.Trim(' ')) + 1; } } @@ -1099,7 +1096,7 @@ public int GetHashCode(KeyValuePair obj) private IEqualityComparer GetSimpleComparer() { Contracts.Assert(_info.TypeSrc.ItemType.RawType == typeof(T)); - if (typeof(T) == typeof(DvText)) + if (typeof(T) == typeof(ReadOnlyMemory)) { // We are hashing twice, once to assign to the slot, and then again, // to build a set of encountered elements. Obviously we cannot use the @@ -1140,7 +1137,7 @@ protected virtual IEqualityComparer GetComparer() return GetSimpleComparer(); } - public override VBuffer GetKeyValuesMetadata() + public override VBuffer> GetKeyValuesMetadata() { return Collector.GetMetadata(); } diff --git a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs index d615b96894..db6ff54298 100644 --- a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs +++ b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs @@ -49,9 +49,9 @@ public static ValueMapper GetSimpleMapper(ISchema schema, i { // REVIEW: Non-textual KeyValues are certainly possible. Should we handle them? // Get the key names. - VBuffer keyValues = default(VBuffer); + VBuffer> keyValues = default; schema.GetMetadata(MetadataUtils.Kinds.KeyValues, col, ref keyValues); - DvText value = default(DvText); + ReadOnlyMemory value = default; // REVIEW: We could optimize for identity, but it's probably not worthwhile. var keyMapper = conv.GetStandardConversion(type, NumberType.U4, out identity); @@ -64,7 +64,7 @@ public static ValueMapper GetSimpleMapper(ISchema schema, i if (intermediate == 0) return; keyValues.GetItemOrDefault((int)(intermediate - 1), ref value); - value.AddToStringBuilder(dst); + dst.AppendMemory(value); }; } @@ -181,7 +181,7 @@ public InvertHashCollector(int slots, int maxCount, ValueMapper dst = src); } - private DvText Textify(ref StringBuilder sb, ref StringBuilder temp, ref char[] cbuffer, ref Pair[] buffer, HashSet pairs) + private ReadOnlyMemory Textify(ref StringBuilder sb, ref StringBuilder temp, ref char[] cbuffer, ref Pair[] buffer, HashSet pairs) { Contracts.AssertValueOrNull(sb); Contracts.AssertValueOrNull(temp); @@ -200,7 +200,7 @@ private DvText Textify(ref StringBuilder sb, ref StringBuilder temp, ref char[] { var value = buffer[0].Value; _stringifyMapper(ref value, ref temp); - return Utils.Size(temp) > 0 ? new DvText(temp.ToString()) : DvText.Empty; + return Utils.Size(temp) > 0 ? temp.ToString().AsMemory() : String.Empty.AsMemory(); } Array.Sort(buffer, 0, count, Comparer.Create((x, y) => x.Order - y.Order)); @@ -219,12 +219,12 @@ private DvText Textify(ref StringBuilder sb, ref StringBuilder temp, ref char[] InvertHashUtils.AppendToEnd(temp, sb, ref cbuffer); } sb.Append('}'); - var retval = new DvText(sb.ToString()); + var retval = sb.ToString().AsMemory(); sb.Clear(); return retval; } - public VBuffer GetMetadata() + public VBuffer> GetMetadata() { int count = _slotToValueSet.Count; Contracts.Assert(count <= _slots); @@ -238,7 +238,7 @@ public VBuffer GetMetadata() { // Sparse var indices = new int[count]; - var values = new DvText[count]; + var values = new ReadOnlyMemory[count]; int i = 0; foreach (var p in _slotToValueSet) { @@ -248,18 +248,18 @@ public VBuffer GetMetadata() } Contracts.Assert(i == count); Array.Sort(indices, values); - return new VBuffer((int)_slots, count, values, indices); + return new VBuffer>((int)_slots, count, values, indices); } else { // Dense - var values = new DvText[_slots]; + var values = new ReadOnlyMemory[_slots]; foreach (var p in _slotToValueSet) { Contracts.Assert(0 <= p.Key && p.Key < _slots); values[p.Key] = Textify(ref sb, ref temp, ref cbuffer, ref pairs, p.Value); } - return new VBuffer(values.Length, values); + return new VBuffer>(values.Length, values); } } @@ -315,7 +315,7 @@ public void Add(uint hash, T key) } /// - /// Simple utility class for saving a of + /// Simple utility class for saving a of ReadOnlyMemory /// as a model, both in a binary and more easily human readable form. /// public static class TextModelHelper @@ -332,14 +332,14 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature); } - private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory, ref VBuffer values) + private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory, ref VBuffer> values) { Contracts.AssertValue(ch); ch.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** - // Codec parameterization: A codec parameterization that should be a VBuffer codec + // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec // int: n, the number of bytes used to write the values // byte[n]: As encoded using the codec @@ -355,7 +355,7 @@ private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory ch.AssertValue(codec); ch.CheckDecode(codec.Type.IsVector); ch.CheckDecode(codec.Type.ItemType.IsText); - var textCodec = (IValueCodec>)codec; + var textCodec = (IValueCodec>>)codec; var bufferLen = ctx.Reader.ReadInt32(); ch.CheckDecode(bufferLen >= 0); @@ -364,14 +364,14 @@ private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory using (var reader = textCodec.OpenReader(stream, 1)) { reader.MoveNext(); - values = default(VBuffer); + values = default(VBuffer>); reader.Get(ref values); } ch.CheckDecode(stream.ReadByte() == -1); } } - private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory, ref VBuffer values) + private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory, ref VBuffer> values) { Contracts.AssertValue(ch); ch.CheckValue(ctx, nameof(ctx)); @@ -379,7 +379,7 @@ private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory ctx.SetVersionInfo(GetVersionInfo()); // *** Binary format *** - // Codec parameterization: A codec parameterization that should be a VBuffer codec + // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec // int: n, the number of bytes used to write the values // byte[n]: As encoded using the codec @@ -389,8 +389,8 @@ private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory ch.Assert(result); ch.Assert(codec.Type.IsVector); ch.Assert(codec.Type.VectorSize == 0); - ch.Assert(codec.Type.ItemType.RawType == typeof(DvText)); - IValueCodec> textCodec = (IValueCodec>)codec; + ch.Assert(codec.Type.ItemType.RawType == typeof(ReadOnlyMemory)); + IValueCodec>> textCodec = (IValueCodec>>)codec; factory.WriteCodec(ctx.Writer.BaseStream, codec); using (var mem = new MemoryStream()) @@ -420,22 +420,23 @@ private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory writer.Write("{0}\t", pair.Key); // REVIEW: What about escaping this, *especially* for linebreaks? // Do C# and .NET really have no equivalent to Python's "repr"? :( - if (!text.HasChars) + if (text.IsEmpty) { writer.WriteLine(); continue; } Utils.EnsureSize(ref buffer, text.Length); - int ichMin; - int ichLim; - string str = text.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); - str.CopyTo(ichMin, buffer, 0, text.Length); + + var span = text.Span; + for (int i = 0; i < text.Length; i++) + buffer[i] = span[i]; + writer.WriteLine(buffer, 0, text.Length); } }); } - public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VBuffer[] keyValues, out ColumnType[] kvTypes) + public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VBuffer>[] keyValues, out ColumnType[] kvTypes) { Contracts.AssertValue(host); host.AssertValue(ctx); @@ -443,7 +444,7 @@ public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VB using (var ch = host.Start("LoadTextValues")) { // Try to find the key names. - VBuffer[] keyValuesLocal = null; + VBuffer>[] keyValuesLocal = null; ColumnType[] kvTypesLocal = null; CodecFactory factory = null; const string dirFormat = "Vocabulary_{0:000}"; @@ -455,7 +456,7 @@ public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VB // Load the lazily initialized structures, if needed. if (keyValuesLocal == null) { - keyValuesLocal = new VBuffer[infoLim]; + keyValuesLocal = new VBuffer>[infoLim]; kvTypesLocal = new ColumnType[infoLim]; factory = new CodecFactory(host); } @@ -470,7 +471,7 @@ public static void LoadAll(IHost host, ModelLoadContext ctx, int infoLim, out VB } } - public static void SaveAll(IHost host, ModelSaveContext ctx, int infoLim, VBuffer[] keyValues) + public static void SaveAll(IHost host, ModelSaveContext ctx, int infoLim, VBuffer>[] keyValues) { Contracts.AssertValue(host); host.AssertValue(ctx); diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index 41abfffcf1..bda8729d4a 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -314,11 +314,11 @@ private void AddMetadata(int i, ColumnMetadataInfo colMetaInfo) { if (typeNames != null) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> getter = (int col, ref VBuffer> dst) => { InputSchema.GetMetadata(MetadataUtils.Kinds.KeyValues, srcCol, ref dst); }; - var info = new MetadataInfo>(typeNames, getter); + var info = new MetadataInfo>>(typeNames, getter); colMetaInfo.Add(MetadataUtils.Kinds.SlotNames, info); } } @@ -326,38 +326,38 @@ private void AddMetadata(int i, ColumnMetadataInfo colMetaInfo) { if (typeNames != null && _types[i].IsKnownSizeVector) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> getter = (int col, ref VBuffer> dst) => { GetSlotNames(i, ref dst); }; - var info = new MetadataInfo>(new VectorType(TextType.Instance, _types[i]), getter); + var info = new MetadataInfo>>(new VectorType(TextType.Instance, _types[i]), getter); colMetaInfo.Add(MetadataUtils.Kinds.SlotNames, info); } } if (!_parent._columns[i].Bag && srcType.ValueCount > 0) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => + MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => { GetCategoricalSlotRanges(i, ref dst); }; - var info = new MetadataInfo>(MetadataUtils.GetCategoricalType(_infos[i].TypeSrc.ValueCount), getter); + var info = new MetadataInfo>(MetadataUtils.GetCategoricalType(_infos[i].TypeSrc.ValueCount), getter); colMetaInfo.Add(MetadataUtils.Kinds.CategoricalSlotRanges, info); } if (!_parent._columns[i].Bag || srcType.ValueCount == 1) { - MetadataUtils.MetadataGetter getter = (int col, ref DvBool dst) => + MetadataUtils.MetadataGetter getter = (int col, ref bool dst) => { dst = true; }; - var info = new MetadataInfo(BoolType.Instance, getter); + var info = new MetadataInfo(BoolType.Instance, getter); colMetaInfo.Add(MetadataUtils.Kinds.IsNormalized, info); } } // Combines source key names and slot names to produce final slot names. - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < _infos.Length); Host.Assert(_types[iinfo].IsKnownSizeVector); @@ -368,7 +368,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Host.Assert(typeSrc.VectorSize > 1); // Get the source slot names, defaulting to empty text. - var namesSlotSrc = default(VBuffer); + var namesSlotSrc = default(VBuffer>); InputSchema.TryGetColumnIndex(_infos[iinfo].Source, out int srcCol); Host.Assert(srcCol >= 0); var typeSlotSrc = InputSchema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, srcCol); @@ -378,22 +378,22 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Host.Check(namesSlotSrc.Length == typeSrc.VectorSize); } else - namesSlotSrc = VBufferUtils.CreateEmpty(typeSrc.VectorSize); + namesSlotSrc = VBufferUtils.CreateEmpty>(typeSrc.VectorSize); int keyCount = typeSrc.ItemType.ItemType.KeyCount; int slotLim = _types[iinfo].VectorSize; Host.Assert(slotLim == (long)typeSrc.VectorSize * keyCount); // Get the source key names, in an array (since we will use them multiple times). - var namesKeySrc = default(VBuffer); + var namesKeySrc = default(VBuffer>); InputSchema.GetMetadata(MetadataUtils.Kinds.KeyValues, srcCol, ref namesKeySrc); Host.Check(namesKeySrc.Length == keyCount); - var keys = new DvText[keyCount]; + var keys = new ReadOnlyMemory[keyCount]; namesKeySrc.CopyTo(keys); var values = dst.Values; if (Utils.Size(values) < slotLim) - values = new DvText[slotLim]; + values = new ReadOnlyMemory[slotLim]; var sb = new StringBuilder(); int slot = 0; @@ -401,8 +401,8 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) { Contracts.Assert(slot == (long)kvpSlot.Key * keyCount); sb.Clear(); - if (kvpSlot.Value.HasChars) - kvpSlot.Value.AddToStringBuilder(sb); + if (!kvpSlot.Value.IsEmpty) + sb.AppendMemory(kvpSlot.Value); else sb.Append('[').Append(kvpSlot.Key).Append(']'); sb.Append('.'); @@ -411,16 +411,16 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) foreach (var key in keys) { sb.Length = len; - key.AddToStringBuilder(sb); - values[slot++] = new DvText(sb.ToString()); + sb.AppendMemory(key); + values[slot++] = sb.ToString().AsMemory(); } } Host.Assert(slot == slotLim); - dst = new VBuffer(slotLim, values, dst.Indices); + dst = new VBuffer>(slotLim, values, dst.Indices); } - private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) + private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) { Host.Assert(0 <= iinfo && iinfo < _infos.Length); @@ -428,7 +428,7 @@ private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) Host.Assert(info.TypeSrc.ValueCount > 0); - DvInt4[] ranges = new DvInt4[info.TypeSrc.ValueCount * 2]; + int[] ranges = new int[info.TypeSrc.ValueCount * 2]; int size = info.TypeSrc.ItemType.KeyCount; ranges[0] = 0; @@ -439,7 +439,7 @@ private void GetCategoricalSlotRanges(int iinfo, ref VBuffer dst) ranges[i + 1] = ranges[i] + size - 1; } - dst = new VBuffer(ranges.Length, ranges); + dst = new VBuffer(ranges.Length, ranges); } protected override Delegate MakeGetter(IRow input, int iinfo, out Action disposer) diff --git a/src/Microsoft.ML.Data/Transforms/LabelIndicatorTransform.cs b/src/Microsoft.ML.Data/Transforms/LabelIndicatorTransform.cs index a7672b5a1c..81882ac749 100644 --- a/src/Microsoft.ML.Data/Transforms/LabelIndicatorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/LabelIndicatorTransform.cs @@ -174,7 +174,7 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, return GetGetter(ch, input, iinfo); } - private ValueGetter GetGetter(IChannel ch, IRow input, int iinfo) + private ValueGetter GetGetter(IChannel ch, IRow input, int iinfo) { Host.AssertValue(ch); ch.AssertValue(input); @@ -190,7 +190,7 @@ private ValueGetter GetGetter(IChannel ch, IRow input, int iinfo) uint cls = (uint)(_classIndex[iinfo] + 1); return - (ref DvBool dst) => + (ref bool dst) => { srcGetter(ref src); dst = src == cls; @@ -202,7 +202,7 @@ private ValueGetter GetGetter(IChannel ch, IRow input, int iinfo) var src = default(float); return - (ref DvBool dst) => + (ref bool dst) => { srcGetter(ref src); dst = src == _classIndex[iinfo]; @@ -214,7 +214,7 @@ private ValueGetter GetGetter(IChannel ch, IRow input, int iinfo) var src = default(double); return - (ref DvBool dst) => + (ref bool dst) => { srcGetter(ref src); dst = src == _classIndex[iinfo]; diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs index 69021d30a3..b515b2293c 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs @@ -557,7 +557,7 @@ public override void AttachMetadata(MetadataDispatcher.Builder bldr, ColumnType Host.Check(typeSrc.RawType == typeof(TFloat)); bldr.AddPrimitive("CdfMean", typeSrc, Mean); bldr.AddPrimitive("CdfStdDev", typeSrc, Stddev); - bldr.AddPrimitive("CdfUseLog", BoolType.Instance, (DvBool)UseLog); + bldr.AddPrimitive("CdfUseLog", BoolType.Instance, UseLog); } } @@ -590,7 +590,7 @@ public override void AttachMetadata(MetadataDispatcher.Builder bldr, ColumnType Host.Check(typeSrc.ItemType.RawType == typeof(TFloat)); bldr.AddGetter>("CdfMean", typeSrc, MeanMetadataGetter); bldr.AddGetter>("CdfStdDev", typeSrc, StddevMetadataGetter); - bldr.AddPrimitive("CdfUseLog", BoolType.Instance, (DvBool)UseLog); + bldr.AddPrimitive("CdfUseLog", BoolType.Instance, UseLog); } private void MeanMetadataGetter(int col, ref VBuffer dst) diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeUtils.cs b/src/Microsoft.ML.Data/Transforms/NormalizeUtils.cs index ab79ab4e11..4cb4a2892f 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeUtils.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeUtils.cs @@ -146,7 +146,6 @@ public static CommonOutputs.TransformOutput Bin(IHostEnvironment env, NormalizeT EntryPointNode node) { var schema = input.Data.Schema; - DvBool isNormalized = DvBool.False; var columnsToNormalize = new List(); foreach (var column in input.Column) { diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 8f7c25166b..7cf6bd3d31 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -473,19 +473,19 @@ private ColumnMetadataInfo MakeMetadata(int iinfo) { var colInfo = _parent._columns[iinfo]; var result = new ColumnMetadataInfo(colInfo.Output); - result.Add(MetadataUtils.Kinds.IsNormalized, new MetadataInfo(BoolType.Instance, IsNormalizedGetter)); + result.Add(MetadataUtils.Kinds.IsNormalized, new MetadataInfo(BoolType.Instance, IsNormalizedGetter)); if (InputSchema.HasSlotNames(ColMapNewToOld[iinfo], colInfo.InputType.VectorSize)) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer slotNames) => + MetadataUtils.MetadataGetter>> getter = (int col, ref VBuffer> slotNames) => InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, ColMapNewToOld[iinfo], ref slotNames); var metaType = InputSchema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, ColMapNewToOld[iinfo]); Contracts.AssertValue(metaType); - result.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>(metaType, getter)); + result.Add(MetadataUtils.Kinds.SlotNames, new MetadataInfo>>(metaType, getter)); } return result; } - private void IsNormalizedGetter(int col, ref DvBool dst) + private void IsNormalizedGetter(int col, ref bool dst) { dst = true; } diff --git a/src/Microsoft.ML.Data/Transforms/TermTransform.cs b/src/Microsoft.ML.Data/Transforms/TermTransform.cs index 70b8002be6..55db806941 100644 --- a/src/Microsoft.ML.Data/Transforms/TermTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/TermTransform.cs @@ -546,16 +546,16 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info for (int iinfo = 0; iinfo < infos.Length; iinfo++) { // First check whether we have a terms argument, and handle it appropriately. - var terms = new DvText(columns[iinfo].Terms); + var terms = columns[iinfo].Terms.AsMemory(); var termsArray = columns[iinfo].Term; - terms = terms.Trim(); - if (terms.HasChars || (termsArray != null && termsArray.Length > 0)) + terms = ReadOnlyMemoryUtils.TrimSpaces(terms); + if (!terms.IsEmpty || (termsArray != null && termsArray.Length > 0)) { // We have terms! Pass it in. var sortOrder = columns[iinfo].Sort; var bldr = Builder.Create(infos[iinfo].TypeSrc, sortOrder); - if (terms.HasChars) + if (!terms.IsEmpty) bldr.ParseAddTermArg(ref terms, ch); else bldr.ParseAddTermArg(termsArray, ch); @@ -795,8 +795,8 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src if (!info.TypeSrc.ItemType.IsText) return false; - var terms = default(VBuffer); - TermMap map = (TermMap)_termMap[iinfo].Map; + var terms = default(VBuffer>); + TermMap> map = (TermMap>)_termMap[iinfo].Map; map.GetTerms(ref terms); string opType = "LabelEncoder"; var node = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType)); @@ -870,8 +870,8 @@ private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, ColInfo info, JToke if (!info.TypeSrc.ItemType.IsText) return null; - var terms = default(VBuffer); - TermMap map = (TermMap)_termMap[iinfo].Map; + var terms = default(VBuffer>); + TermMap> map = (TermMap>)_termMap[iinfo].Map; map.GetTerms(ref terms); var jsonMap = new JObject(); foreach (var kv in terms.Items()) diff --git a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs index fcc0155617..e94c457658 100644 --- a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs +++ b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs @@ -79,7 +79,7 @@ private static Builder CreateCore(PrimitiveType type, bool sorted) /// /// The input terms argument /// The channel against which to report errors and warnings - public abstract void ParseAddTermArg(ref DvText terms, IChannel ch); + public abstract void ParseAddTermArg(ref ReadOnlyMemory terms, IChannel ch); /// /// Handling for the "term" arg. @@ -88,7 +88,7 @@ private static Builder CreateCore(PrimitiveType type, bool sorted) /// The channel against which to report errors and warnings public abstract void ParseAddTermArg(string[] terms, IChannel ch); - private sealed class TextImpl : Builder + private sealed class TextImpl : Builder> { private readonly NormStr.Pool _pool; private readonly bool _sorted; @@ -105,12 +105,12 @@ public TextImpl(bool sorted) _sorted = sorted; } - public override bool TryAdd(ref DvText val) + public override bool TryAdd(ref ReadOnlyMemory val) { - if (!val.HasChars) + if (val.IsEmpty) return false; int count = _pool.Count; - return val.AddToPool(_pool).Id == count; + return ReadOnlyMemoryUtils.AddToPool(val, _pool).Id == count; } public override TermMap Finish() @@ -119,7 +119,7 @@ public override TermMap Finish() return new TermMap.TextImpl(_pool); // REVIEW: Should write a Sort method in NormStr.Pool to make sorting more memory efficient. var perm = Utils.GetIdentityPermutation(_pool.Count); - Comparison comp = (i, j) => _pool.GetNormStrById(i).Value.CompareTo(_pool.GetNormStrById(j).Value); + Comparison comp = (i, j) => _pool.GetNormStrById(i).Value.Span.CompareTo(_pool.GetNormStrById(j).Value.Span, StringComparison.Ordinal); Array.Sort(perm, comp); var sortedPool = new NormStr.Pool(); @@ -127,7 +127,7 @@ public override TermMap Finish() { var nstr = sortedPool.Add(_pool.GetNormStrById(perm[i]).Value); Contracts.Assert(nstr.Id == i); - Contracts.Assert(i == 0 || sortedPool.GetNormStrById(i - 1).Value.CompareTo(sortedPool.GetNormStrById(i).Value) < 0); + Contracts.Assert(i == 0 || sortedPool.GetNormStrById(i - 1).Value.Span.CompareTo(sortedPool.GetNormStrById(i).Value.Span, StringComparison.Ordinal) < 0); } Contracts.Assert(sortedPool.Count == _pool.Count); return new TermMap.TextImpl(sortedPool); @@ -201,16 +201,16 @@ protected Builder(PrimitiveType type) /// /// The input terms argument /// The channel against which to report errors and warnings - public override void ParseAddTermArg(ref DvText terms, IChannel ch) + public override void ParseAddTermArg(ref ReadOnlyMemory terms, IChannel ch) { T val; var tryParse = Conversion.Conversions.Instance.GetParseConversion(ItemType); for (bool more = true; more;) { - DvText term; - more = terms.SplitOne(',', out term, out terms); - term = term.Trim(); - if (!term.HasChars) + ReadOnlyMemory term; + more = ReadOnlyMemoryUtils.SplitOne(terms, ',', out term, out terms); + term = ReadOnlyMemoryUtils.TrimSpaces(term); + if (term.IsEmpty) ch.Warning("Empty strings ignored in 'terms' specification"); else if (!tryParse(ref term, out val)) ch.Warning("Item '{0}' ignored in 'terms' specification since it could not be parsed as '{1}'", term, ItemType); @@ -233,9 +233,9 @@ public override void ParseAddTermArg(string[] terms, IChannel ch) var tryParse = Conversion.Conversions.Instance.GetParseConversion(ItemType); foreach (var sterm in terms) { - DvText term = new DvText(sterm); - term = term.Trim(); - if (!term.HasChars) + ReadOnlyMemory term = sterm.AsMemory(); + term = ReadOnlyMemoryUtils.TrimSpaces(term); + if (term.IsEmpty) ch.Warning("Empty strings ignored in 'term' specification"); else if (!tryParse(ref term, out val)) ch.Warning("Item '{0}' ignored in 'term' specification since it could not be parsed as '{1}'", term, ItemType); @@ -569,7 +569,7 @@ private static TermMap LoadCodecCore(ModelLoadContext ctx, IExceptionContext public abstract void WriteTextTerms(TextWriter writer); - public sealed class TextImpl : TermMap + public sealed class TextImpl : TermMap> { private readonly NormStr.Pool _pool; @@ -631,35 +631,35 @@ internal override void Save(ModelSaveContext ctx, IHostEnvironment host, CodecFa } } - private void KeyMapper(ref DvText src, ref uint dst) + private void KeyMapper(ref ReadOnlyMemory src, ref uint dst) { - var nstr = src.FindInPool(_pool); + var nstr = ReadOnlyMemoryUtils.FindInPool(src, _pool); if (nstr == null) dst = 0; else dst = (uint)nstr.Id + 1; } - public override ValueMapper GetKeyMapper() + public override ValueMapper, uint> GetKeyMapper() { return KeyMapper; } - public override void GetTerms(ref VBuffer dst) + public override void GetTerms(ref VBuffer> dst) { - DvText[] values = dst.Values; + ReadOnlyMemory[] values = dst.Values; if (Utils.Size(values) < _pool.Count) - values = new DvText[_pool.Count]; + values = new ReadOnlyMemory[_pool.Count]; int slot = 0; foreach (var nstr in _pool) { Contracts.Assert(0 <= nstr.Id & nstr.Id < values.Length); Contracts.Assert(nstr.Id == slot); - values[nstr.Id] = new DvText(nstr.Value); + values[nstr.Id] = nstr.Value; slot++; } - dst = new VBuffer(_pool.Count, values, dst.Indices); + dst = new VBuffer>(_pool.Count, values, dst.Indices); } public override void WriteTextTerms(TextWriter writer) @@ -770,7 +770,7 @@ protected TermMap(PrimitiveType type, int count) public abstract void GetTerms(ref VBuffer dst); } - private static void GetTextTerms(ref VBuffer src, ValueMapper stringMapper, ref VBuffer dst) + private static void GetTextTerms(ref VBuffer src, ValueMapper stringMapper, ref VBuffer> dst) { // REVIEW: This convenience function is not optimized. For non-string // types, creating a whole bunch of string objects on the heap is one that is @@ -778,23 +778,23 @@ private static void GetTextTerms(ref VBuffer src, ValueMapper)); StringBuilder sb = null; - DvText[] values = dst.Values; + ReadOnlyMemory[] values = dst.Values; // We'd obviously have to adjust this a bit, if we ever had sparse metadata vectors. // The way the term map metadata getters are structured right now, this is impossible. Contracts.Assert(src.IsDense); if (Utils.Size(values) < src.Length) - values = new DvText[src.Length]; + values = new ReadOnlyMemory[src.Length]; for (int i = 0; i < src.Length; ++i) { stringMapper(ref src.Values[i], ref sb); - values[i] = new DvText(sb.ToString()); + values[i] = sb.ToString().AsMemory(); } - dst = new VBuffer(src.Length, values, dst.Indices); + dst = new VBuffer>(src.Length, values, dst.Indices); } /// @@ -1048,8 +1048,8 @@ public override void AddMetadata(ColumnMetadataInfo colMetaInfo) var conv = Conversion.Conversions.Instance; var stringMapper = conv.GetStringConversion(TypedMap.ItemType); - MetadataUtils.MetadataGetter> getter = - (int iinfo, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> getter = + (int iinfo, ref VBuffer> dst) => { // No buffer sharing convenient here. VBuffer dstT = default(VBuffer); @@ -1057,7 +1057,7 @@ public override void AddMetadata(ColumnMetadataInfo colMetaInfo) GetTextTerms(ref dstT, stringMapper, ref dst); }; var columnType = new VectorType(TextType.Instance, TypedMap.OutputType.KeyCount); - var info = new MetadataInfo>(columnType, getter); + var info = new MetadataInfo>>(columnType, getter); colMetaInfo.Add(MetadataUtils.Kinds.KeyValues, info); } else @@ -1144,8 +1144,8 @@ private bool AddMetadataCore(ColumnType srcMetaType, ColumnMetadataInfo c if (IsTextMetadata && !srcMetaType.IsText) { var stringMapper = convInst.GetStringConversion(srcMetaType); - MetadataUtils.MetadataGetter> mgetter = - (int iinfo, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> mgetter = + (int iinfo, ref VBuffer> dst) => { _host.Assert(iinfo == _iinfo); var tempMeta = default(VBuffer); @@ -1155,7 +1155,7 @@ private bool AddMetadataCore(ColumnType srcMetaType, ColumnMetadataInfo c _host.Assert(dst.Length == TypedMap.OutputType.KeyCount); }; var columnType = new VectorType(TextType.Instance, TypedMap.OutputType.KeyCount); - var info = new MetadataInfo>(columnType, mgetter); + var info = new MetadataInfo>>(columnType, mgetter); colMetaInfo.Add(MetadataUtils.Kinds.KeyValues, info); } else diff --git a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs index b53c721f61..080bad4c51 100644 --- a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs +++ b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -188,7 +189,7 @@ public static IDataLoader LoadLoader(IHostEnvironment env, RepositoryReader rep, } /// - /// REVIEW: consider adding an overload that returns + /// REVIEW: consider adding an overload that returns of /// Loads optionally feature names from the repository directory. /// Returns false iff no stream was found for feature names, iff result is set to null. /// @@ -287,10 +288,10 @@ public static IEnumerable> LoadRoleMappingsOrNu using (var cursor = loader.GetRowCursor(c => true)) { - var roleGetter = cursor.GetGetter(0); - var colGetter = cursor.GetGetter(1); - var role = default(DvText); - var col = default(DvText); + var roleGetter = cursor.GetGetter>(0); + var colGetter = cursor.GetGetter>(1); + var role = default(ReadOnlyMemory); + var col = default(ReadOnlyMemory); while (cursor.MoveNext()) { roleGetter(ref role); diff --git a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs index f72ba30977..4014afceba 100644 --- a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs +++ b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs @@ -66,7 +66,7 @@ private int FindDistinctCounts(ref VBuffer values, double[] distinctValu // list (that is, NaN is considered to be ordered "below" any other value for the purpose of // a sort, including negative infinity). So when checking if values contains no NaN values, it // suffices to check only the first item. - if (valArray[0].IsNA()) + if (double.IsNaN(valArray[0])) return -1; int idist = 0; // Index into the "distinct" arrays. if (!values.IsDense && valArray[0] > 0) diff --git a/src/Microsoft.ML.FastTree/Dataset/Dataset.cs b/src/Microsoft.ML.FastTree/Dataset/Dataset.cs index f31ce73a94..b1b24bd4a1 100644 --- a/src/Microsoft.ML.FastTree/Dataset/Dataset.cs +++ b/src/Microsoft.ML.FastTree/Dataset/Dataset.cs @@ -609,7 +609,7 @@ public int[][] GetAssignments(double[] fraction, int randomSeed, out int[][] ass for (int i = 0; i < numParts; ++i) { cumulative += fraction[i]; - thresh[i] = (int)(cumulative * Int32.MaxValue); + thresh[i] = (int)(cumulative * int.MaxValue); if (fraction[i] == 0.0) thresh[i]--; } diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index fedf642f2e..124e1117bd 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -993,7 +993,7 @@ public static DataConverter Create(RoleMappedData data, IHost host, Double[][] b return conv; } - protected void GetFeatureNames(RoleMappedData data, ref VBuffer names) + protected void GetFeatureNames(RoleMappedData data, ref VBuffer> names) { // The existing implementations will have verified this by the time this utility // function is called. @@ -1006,11 +1006,11 @@ protected void GetFeatureNames(RoleMappedData data, ref VBuffer names) if (sch.HasSlotNames(feat.Index, feat.Type.ValueCount)) sch.GetMetadata(MetadataUtils.Kinds.SlotNames, feat.Index, ref names); else - names = new VBuffer(feat.Type.ValueCount, 0, names.Values, names.Indices); + names = new VBuffer>(feat.Type.ValueCount, 0, names.Values, names.Indices); } #if !CORECLR - protected void GetFeatureIniContent(RoleMappedData data, ref VBuffer content) + protected void GetFeatureIniContent(RoleMappedData data, ref VBuffer> content) { // The existing implementations will have verified this by the time this utility // function is called. @@ -1022,7 +1022,7 @@ protected void GetFeatureIniContent(RoleMappedData data, ref VBuffer con var sch = data.Schema.Schema; var type = sch.GetMetadataTypeOrNull(BingBinLoader.IniContentMetadataKind, feat.Index); if (type == null || type.VectorSize != feat.Type.ValueCount || !type.IsVector || !type.ItemType.IsText) - content = new VBuffer(feat.Type.ValueCount, 0, content.Values, content.Indices); + content = new VBuffer>(feat.Type.ValueCount, 0, content.Values, content.Indices); else sch.GetMetadata(BingBinLoader.IniContentMetadataKind, feat.Index, ref content); } @@ -3192,7 +3192,7 @@ private IEnumerable> GetSortedFeatureGains(RoleMapp { var gainMap = new FeatureToGainMap(TrainedEnsemble.Trees.ToList(), normalize: true); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, NumFeatures, ref names); var ordered = gainMap.OrderByDescending(pair => pair.Value); Double max = ordered.FirstOrDefault().Value; @@ -3224,7 +3224,7 @@ private void SaveEnsembleAsCode(TextWriter writer, RoleMappedSchema schema) { Host.AssertValueOrNull(schema); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, NumFeatures, ref names); int i = 0; @@ -3244,13 +3244,13 @@ private void SaveEnsembleAsCode(TextWriter writer, RoleMappedSchema schema) /// /// Convert a single tree to code, called recursively /// - private void SaveTreeAsCode(RegressionTree tree, TextWriter writer, ref VBuffer names) + private void SaveTreeAsCode(RegressionTree tree, TextWriter writer, ref VBuffer> names) { ToCSharp(tree, writer, 0, ref names); } // converts a subtree into a C# expression - private void ToCSharp(RegressionTree tree, TextWriter writer, int node, ref VBuffer names) + private void ToCSharp(RegressionTree tree, TextWriter writer, int node, ref VBuffer> names) { if (node < 0) { @@ -3331,7 +3331,7 @@ public int GetLeaf(int treeId, ref VBuffer features, ref List path) public IRow GetSummaryIRowOrNull(RoleMappedSchema schema) { - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, NumFeatures, ref names); var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, NumFeatures), ref names); diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 6f1b387ce9..0deceb1aff 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -908,18 +908,17 @@ public void SaveAsText(TextWriter writer, RoleMappedSchema schema) // A useful test in this case would be a model trained with: // maml.exe train data=Samples\breast-cancer-withheader.txt loader=text{header+ col=Label:0 col=F1:1-4 col=F2:4 col=F3:5-*} // xf =expr{col=F2 expr=x:0.0} xf=concat{col=Features:F1,F2,F3} tr=gam out=bubba2.zip - // Write out the intercept writer.WriteLine("-1\tIntercept"); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, _inputLength, ref names); for (int internalIndex = 0; internalIndex < _numFeatures; internalIndex++) { int featureIndex = _featureMap[internalIndex]; var name = names.GetItemOrDefault(featureIndex); - writer.WriteLine(name.HasChars ? "{0}\t{1}" : "{0}\tFeature {0}", featureIndex, name); + writer.WriteLine(!name.IsEmpty ? "{0}\t{1}" : "{0}\tFeature {0}", featureIndex, name); } writer.WriteLine(); @@ -994,7 +993,7 @@ private sealed class Context private readonly GamPredictorBase _pred; private readonly RoleMappedData _data; - private readonly VBuffer _featNames; + private readonly VBuffer> _featNames; // The scores. private readonly float[] _scores; // The labels. @@ -1038,7 +1037,7 @@ public Context(IChannel ch, GamPredictorBase pred, RoleMappedData data, IEvaluat if (schema.Schema.HasSlotNames(schema.Feature.Index, len)) schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, schema.Feature.Index, ref _featNames); else - _featNames = VBufferUtils.CreateEmpty(len); + _featNames = VBufferUtils.CreateEmpty>(len); var numFeatures = _pred._binEffects.Length; _binDocsList = new List[numFeatures][]; diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs index 12b4a3cc36..fedde0f2e8 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs @@ -397,8 +397,8 @@ public FeatureToGainMap(IList trees, bool normalize) /// public sealed class FeaturesToContentMap { - private readonly VBuffer _content; - private readonly VBuffer _names; + private readonly VBuffer> _content; + private readonly VBuffer> _names; public int Count => _names.Length; @@ -419,15 +419,15 @@ public FeaturesToContentMap(RoleMappedSchema schema) if (sch.HasSlotNames(feat.Index, feat.Type.ValueCount)) sch.GetMetadata(MetadataUtils.Kinds.SlotNames, feat.Index, ref _names); else - _names = VBufferUtils.CreateEmpty(feat.Type.ValueCount); + _names = VBufferUtils.CreateEmpty>(feat.Type.ValueCount); #if !CORECLR var type = sch.GetMetadataTypeOrNull(BingBinLoader.IniContentMetadataKind, feat.Index); if (type != null && type.IsVector && type.VectorSize == feat.Type.ValueCount && type.ItemType.IsText) sch.GetMetadata(BingBinLoader.IniContentMetadataKind, feat.Index, ref _content); else - _content = VBufferUtils.CreateEmpty(feat.Type.ValueCount); + _content = VBufferUtils.CreateEmpty>(feat.Type.ValueCount); #else - _content = VBufferUtils.CreateEmpty(feat.Type.ValueCount); + _content = VBufferUtils.CreateEmpty>(feat.Type.ValueCount); #endif Contracts.Assert(_names.Length == _content.Length); } @@ -435,15 +435,15 @@ public FeaturesToContentMap(RoleMappedSchema schema) public string GetName(int ifeat) { Contracts.Assert(0 <= ifeat && ifeat < Count); - DvText name = _names.GetItemOrDefault(ifeat); - return name.HasChars ? name.ToString() : string.Format("f{0}", ifeat); + ReadOnlyMemory name = _names.GetItemOrDefault(ifeat); + return !name.IsEmpty ? name.ToString() : string.Format("f{0}", ifeat); } public string GetContent(int ifeat) { Contracts.Assert(0 <= ifeat && ifeat < Count); - DvText content = _content.GetItemOrDefault(ifeat); - return content.HasChars ? content.ToString() : DatasetUtils.GetDefaultTransform(GetName(ifeat)); + ReadOnlyMemory content = _content.GetItemOrDefault(ifeat); + return !content.IsEmpty ? content.ToString() : DatasetUtils.GetDefaultTransform(GetName(ifeat)); } } } diff --git a/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs b/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs index 37b52ff806..fcd2550147 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs @@ -135,20 +135,20 @@ public void GetMetadata(string kind, int col, ref TValue value) _ectx.CheckParam(0 <= col && col < ColumnCount, nameof(col)); if ((col == PathIdx || col == LeafIdx) && kind == MetadataUtils.Kinds.IsNormalized) - MetadataUtils.Marshal(IsNormalized, col, ref value); + MetadataUtils.Marshal(IsNormalized, col, ref value); else if (kind == MetadataUtils.Kinds.SlotNames) { switch (col) { case TreeIdx: - MetadataUtils.Marshal, TValue>(_parent.GetTreeSlotNames, col, ref value); + MetadataUtils.Marshal>, TValue>(_parent.GetTreeSlotNames, col, ref value); break; case LeafIdx: - MetadataUtils.Marshal, TValue>(_parent.GetLeafSlotNames, col, ref value); + MetadataUtils.Marshal>, TValue>(_parent.GetLeafSlotNames, col, ref value); break; default: Contracts.Assert(col == PathIdx); - MetadataUtils.Marshal, TValue>(_parent.GetPathSlotNames, col, ref value); + MetadataUtils.Marshal>, TValue>(_parent.GetPathSlotNames, col, ref value); break; } } @@ -156,9 +156,9 @@ public void GetMetadata(string kind, int col, ref TValue value) throw _ectx.ExceptGetMetadata(); } - private void IsNormalized(int iinfo, ref DvBool dst) + private void IsNormalized(int iinfo, ref bool dst) { - dst = DvBool.True; + dst = true; } } @@ -478,48 +478,48 @@ private static int CountLeaves(FastTreePredictionWrapper ensemble) return totalLeafCount; } - private void GetTreeSlotNames(int col, ref VBuffer dst) + private void GetTreeSlotNames(int col, ref VBuffer> dst) { var numTrees = _ensemble.NumTrees; var names = dst.Values; if (Utils.Size(names) < numTrees) - names = new DvText[numTrees]; + names = new ReadOnlyMemory[numTrees]; for (int t = 0; t < numTrees; t++) - names[t] = new DvText(string.Format("Tree{0:000}", t)); + names[t] = string.Format("Tree{0:000}", t).AsMemory(); - dst = new VBuffer(numTrees, names, dst.Indices); + dst = new VBuffer>(numTrees, names, dst.Indices); } - private void GetLeafSlotNames(int col, ref VBuffer dst) + private void GetLeafSlotNames(int col, ref VBuffer> dst) { var numTrees = _ensemble.NumTrees; var names = dst.Values; if (Utils.Size(names) < _totalLeafCount) - names = new DvText[_totalLeafCount]; + names = new ReadOnlyMemory[_totalLeafCount]; int i = 0; int t = 0; foreach (var tree in _ensemble.GetTrees()) { for (int l = 0; l < tree.NumLeaves; l++) - names[i++] = new DvText(string.Format("Tree{0:000}Leaf{1:000}", t, l)); + names[i++] = string.Format("Tree{0:000}Leaf{1:000}", t, l).AsMemory(); t++; } _host.Assert(i == _totalLeafCount); - dst = new VBuffer(_totalLeafCount, names, dst.Indices); + dst = new VBuffer>(_totalLeafCount, names, dst.Indices); } - private void GetPathSlotNames(int col, ref VBuffer dst) + private void GetPathSlotNames(int col, ref VBuffer> dst) { var numTrees = _ensemble.NumTrees; var totalNodeCount = _totalLeafCount - numTrees; var names = dst.Values; if (Utils.Size(names) < totalNodeCount) - names = new DvText[totalNodeCount]; + names = new ReadOnlyMemory[totalNodeCount]; int i = 0; int t = 0; @@ -527,11 +527,11 @@ private void GetPathSlotNames(int col, ref VBuffer dst) { var numLeaves = tree.NumLeaves; for (int l = 0; l < tree.NumLeaves - 1; l++) - names[i++] = new DvText(string.Format("Tree{0:000}Node{1:000}", t, l)); + names[i++] = string.Format("Tree{0:000}Node{1:000}", t, l).AsMemory(); t++; } _host.Assert(i == totalNodeCount); - dst = new VBuffer(totalNodeCount, names, dst.Indices); + dst = new VBuffer>(totalNodeCount, names, dst.Indices); } public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema) diff --git a/src/Microsoft.ML.FastTree/Utils/PseudorandomFunction.cs b/src/Microsoft.ML.FastTree/Utils/PseudorandomFunction.cs index 43304b22f7..f94701856a 100644 --- a/src/Microsoft.ML.FastTree/Utils/PseudorandomFunction.cs +++ b/src/Microsoft.ML.FastTree/Utils/PseudorandomFunction.cs @@ -19,7 +19,7 @@ public sealed class PseudorandomFunction public PseudorandomFunction(Random rand) { - _data = _periodics.Select(x => Enumerable.Range(0, x).Select(y => rand.Next(-1, Int32.MaxValue) + 1).ToArray()).ToArray(); + _data = _periodics.Select(x => Enumerable.Range(0, x).Select(y => rand.Next(-1, int.MaxValue) + 1).ToArray()).ToArray(); } public int Apply(ulong seed) diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index dbf2999657..cb7472633f 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -688,7 +688,7 @@ public static OlsLinearRegressionPredictor Create(IHostEnvironment env, ModelLoa public override void SaveSummary(TextWriter writer, RoleMappedSchema schema) { - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names); writer.WriteLine("Ordinary Least Squares Model Summary"); @@ -706,7 +706,7 @@ public override void SaveSummary(TextWriter writer, RoleMappedSchema schema) for (int i = 0; i < coeffs.Length; i++) { var name = names.GetItemOrDefault(i); - writer.WriteLine(format, i, DvText.Identical(name, DvText.Empty) ? $"f{i}" : name.ToString(), + writer.WriteLine(format, i, name.IsEmpty ? $"f{i}" : name.ToString(), coeffs[i], _standardErrors[i + 1], _tValues[i + 1], _pValues[i + 1]); } } @@ -721,7 +721,7 @@ public override void SaveSummary(TextWriter writer, RoleMappedSchema schema) for (int i = 0; i < coeffs.Length; i++) { var name = names.GetItemOrDefault(i); - writer.WriteLine(format, i, DvText.Identical(name, DvText.Empty) ? $"f{i}" : name.ToString(), coeffs[i]); + writer.WriteLine(format, i, name.IsEmpty ? $"f{i}" : name.ToString(), coeffs[i]); } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs index b012d59d34..067cd30747 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageLoaderTransform.cs @@ -31,7 +31,7 @@ namespace Microsoft.ML.Runtime.ImageAnalytics { /// - /// Transform which takes one or many columns of type and loads them as + /// Transform which takes one or many columns of type ReadOnlyMemory and loads them as /// public sealed class ImageLoaderTransform : OneToOneTransformerBase { @@ -165,8 +165,8 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); disposer = null; - var getSrc = input.GetGetter(ColMapNewToOld[iinfo]); - DvText src = default; + var getSrc = input.GetGetter>(ColMapNewToOld[iinfo]); + ReadOnlyMemory src = default; ValueGetter del = (ref Bitmap dst) => { diff --git a/src/Microsoft.ML.Legacy/Data/TextLoader.cs b/src/Microsoft.ML.Legacy/Data/TextLoader.cs index 7d215c0a00..84ab83807c 100644 --- a/src/Microsoft.ML.Legacy/Data/TextLoader.cs +++ b/src/Microsoft.ML.Legacy/Data/TextLoader.cs @@ -160,19 +160,19 @@ private static bool TryGetDataKind(Type type, out DataKind kind) Contracts.AssertValue(type); // REVIEW: Make this more efficient. Should we have a global dictionary? - if (type == typeof(DvInt1) || type == typeof(sbyte)) + if (type == typeof(sbyte)) kind = DataKind.I1; else if (type == typeof(byte) || type == typeof(char)) kind = DataKind.U1; - else if (type == typeof(DvInt2) || type == typeof(short)) + else if (type == typeof(short)) kind = DataKind.I2; else if (type == typeof(ushort)) kind = DataKind.U2; - else if (type == typeof(DvInt4) || type == typeof(int)) + else if ( type == typeof(int)) kind = DataKind.I4; else if (type == typeof(uint)) kind = DataKind.U4; - else if (type == typeof(DvInt8) || type == typeof(long)) + else if (type == typeof(long)) kind = DataKind.I8; else if (type == typeof(ulong)) kind = DataKind.U8; @@ -180,15 +180,15 @@ private static bool TryGetDataKind(Type type, out DataKind kind) kind = DataKind.R4; else if (type == typeof(Double)) kind = DataKind.R8; - else if (type == typeof(DvText) || type == typeof(string)) + else if (type == typeof(ReadOnlyMemory) || type == typeof(string)) kind = DataKind.TX; - else if (type == typeof(DvBool) || type == typeof(bool)) + else if (type == typeof(bool)) kind = DataKind.BL; - else if (type == typeof(DvTimeSpan) || type == typeof(TimeSpan)) + else if (type == typeof(TimeSpan)) kind = DataKind.TS; - else if (type == typeof(DvDateTime) || type == typeof(DateTime)) + else if (type == typeof(DateTime)) kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone) || type == typeof(TimeZoneInfo)) + else if (type == typeof(DateTimeOffset)) kind = DataKind.DZ; else if (type == typeof(UInt128)) kind = DataKind.UG; diff --git a/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs b/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs index 5df87ac098..c9b3dd20f8 100644 --- a/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs +++ b/src/Microsoft.ML.Legacy/LearningPipelineDebugProxy.cs @@ -92,11 +92,11 @@ private PipelineItemDebugColumn[] BuildColumns() var n = dataView.Schema.GetColumnType(colIndex).VectorSize; if (dataView.Schema.HasSlotNames(colIndex, n)) { - var slots = default(VBuffer); + var slots = default(VBuffer>); dataView.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colIndex, ref slots); bool appendEllipse = false; - IEnumerable slotNames = slots.Items(true).Select(x => x.Value); + IEnumerable> slotNames = slots.Items(true).Select(x => x.Value); if (slots.Length > MaxSlotNamesToDisplay) { appendEllipse = true; @@ -175,7 +175,7 @@ private PipelineItemDebugRow[] BuildRows() var getters = DataViewUtils.PopulateGetterArray(cursor, colIndices); - var row = new DvText[colCount]; + var row = new ReadOnlyMemory[colCount]; while (cursor.MoveNext() && i < MaxDisplayRows) { for (int column = 0; column < colCount; column++) diff --git a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs b/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs index 2c2fd64215..643d122f14 100644 --- a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs +++ b/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs @@ -52,7 +52,7 @@ internal static List Create(IHostEnvironment env, IDataView con } IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); - var slots = default(VBuffer); + var slots = default(VBuffer>); confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); string[] classNames = new string[slots.Count]; for (int i = 0; i < slots.Count; i++) diff --git a/src/Microsoft.ML.Legacy/PredictionModel.cs b/src/Microsoft.ML.Legacy/PredictionModel.cs index ae8c58995f..45c4738fe2 100644 --- a/src/Microsoft.ML.Legacy/PredictionModel.cs +++ b/src/Microsoft.ML.Legacy/PredictionModel.cs @@ -49,7 +49,7 @@ public bool TryGetScoreLabelNames(out string[] names, string scoreColumnName = D if (!schema.HasSlotNames(colIndex, expectedLabelCount)) return false; - VBuffer labels = default; + VBuffer> labels = default; schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colIndex, ref labels); if (labels.Length != expectedLabelCount) diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CrossValidationMacro.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CrossValidationMacro.cs index d018e0ce7f..dfe8a0887b 100644 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CrossValidationMacro.cs +++ b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/CrossValidationMacro.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.Linq; using Microsoft.ML.Runtime; @@ -446,7 +447,7 @@ public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetrics var dvBldr = new ArrayDataViewBuilder(env); var warn = $"Detected columns of variable length: {string.Join(", ", variableSizeVectorColumnNames)}." + $" Consider setting collateMetrics- for meaningful per-Folds results."; - dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, new DvText(warn)); + dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, warn.AsMemory()); warnings.Add(dvBldr.GetDataView()); } diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs index 95548a0e8a..09dee2037b 100644 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs +++ b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs @@ -121,7 +121,7 @@ private static string GetTerms(IDataView data, string colName) var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, col); if (type == null || !type.IsKnownSizeVector || !type.ItemType.IsText) return null; - var metadata = default(VBuffer); + var metadata = default(VBuffer>); schema.GetMetadata(MetadataUtils.Kinds.KeyValues, col, ref metadata); if (!metadata.IsDense) return null; @@ -130,7 +130,7 @@ private static string GetTerms(IDataView data, string colName) for (int i = 0; i < metadata.Length; i++) { sb.Append(pre); - metadata.Values[i].AddToStringBuilder(sb); + sb.AppendMemory(metadata.Values[i]); pre = ","; } return sb.ToString(); diff --git a/src/Microsoft.ML.Onnx/OnnxNodeImpl.cs b/src/Microsoft.ML.Onnx/OnnxNodeImpl.cs index 9b30fd1d87..369de019fc 100644 --- a/src/Microsoft.ML.Onnx/OnnxNodeImpl.cs +++ b/src/Microsoft.ML.Onnx/OnnxNodeImpl.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.UniversalModelFormat.Onnx; @@ -30,11 +31,11 @@ public override void AddAttribute(string argName, long value) => OnnxUtils.NodeAddAttributes(_node, argName, value); public override void AddAttribute(string argName, IEnumerable value) => OnnxUtils.NodeAddAttributes(_node, argName, value); - public override void AddAttribute(string argName, DvText value) + public override void AddAttribute(string argName, ReadOnlyMemory value) => OnnxUtils.NodeAddAttributes(_node, argName, value); public override void AddAttribute(string argName, string[] value) => OnnxUtils.NodeAddAttributes(_node, argName, value); - public override void AddAttribute(string argName, IEnumerable value) + public override void AddAttribute(string argName, IEnumerable> value) => OnnxUtils.NodeAddAttributes(_node, argName, value); public override void AddAttribute(string argName, IEnumerable value) => OnnxUtils.NodeAddAttributes(_node, argName, value); diff --git a/src/Microsoft.ML.Onnx/OnnxUtils.cs b/src/Microsoft.ML.Onnx/OnnxUtils.cs index 9605226846..9fe761f42e 100644 --- a/src/Microsoft.ML.Onnx/OnnxUtils.cs +++ b/src/Microsoft.ML.Onnx/OnnxUtils.cs @@ -8,6 +8,7 @@ using Google.Protobuf; using Microsoft.ML.Runtime.UniversalModelFormat.Onnx; using Microsoft.ML.Runtime.Data; +using System; namespace Microsoft.ML.Runtime.Model.Onnx { @@ -186,13 +187,13 @@ public static void NodeAddAttributes(NodeProto node, string argName, long value) public static void NodeAddAttributes(NodeProto node, string argName, IEnumerable value) => node.Attribute.Add(MakeAttribute(argName, value)); - public static void NodeAddAttributes(NodeProto node, string argName, DvText value) + public static void NodeAddAttributes(NodeProto node, string argName, ReadOnlyMemory value) => node.Attribute.Add(MakeAttribute(argName, StringToByteString(value))); public static void NodeAddAttributes(NodeProto node, string argName, string[] value) => node.Attribute.Add(MakeAttribute(argName, StringToByteString(value))); - public static void NodeAddAttributes(NodeProto node, string argName, IEnumerable value) + public static void NodeAddAttributes(NodeProto node, string argName, IEnumerable> value) => node.Attribute.Add(MakeAttribute(argName, StringToByteString(value))); public static void NodeAddAttributes(NodeProto node, string argName, IEnumerable value) @@ -210,8 +211,8 @@ public static void NodeAddAttributes(NodeProto node, string argName, IEnumerable public static void NodeAddAttributes(NodeProto node, string argName, bool value) => node.Attribute.Add(MakeAttribute(argName, value)); - private static ByteString StringToByteString(DvText str) => ByteString.CopyFrom(Encoding.UTF8.GetBytes(str.ToString())); - private static IEnumerable StringToByteString(IEnumerable str) + private static ByteString StringToByteString(ReadOnlyMemory str) => ByteString.CopyFrom(Encoding.UTF8.GetBytes(str.ToString())); + private static IEnumerable StringToByteString(IEnumerable> str) => str.Select(s => ByteString.CopyFrom(Encoding.UTF8.GetBytes(s.ToString()))); private static IEnumerable StringToByteString(IEnumerable str) @@ -252,7 +253,7 @@ public static ModelProto MakeModel(List nodes, string producerName, s model.Domain = domain; model.ProducerName = producerName; model.ProducerVersion = producerVersion; - model.IrVersion = (long)Version.IrVersion; + model.IrVersion = (long)UniversalModelFormat.Onnx.Version.IrVersion; model.ModelVersion = modelVersion; model.OpsetImport.Add(new OperatorSetIdProto() { Domain = "ai.onnx.ml", Version = 1 }); model.OpsetImport.Add(new OperatorSetIdProto() { Domain = "", Version = 7 }); diff --git a/src/Microsoft.ML.Parquet/ParquetLoader.cs b/src/Microsoft.ML.Parquet/ParquetLoader.cs index 503debae65..e131949138 100644 --- a/src/Microsoft.ML.Parquet/ParquetLoader.cs +++ b/src/Microsoft.ML.Parquet/ParquetLoader.cs @@ -358,7 +358,7 @@ private ColumnType ConvertFieldType(DataType parquetType) case DataType.Decimal: return NumberType.R8; case DataType.DateTimeOffset: - return DateTimeZoneType.Instance; + return DateTimeOffsetType.Instance; case DataType.Interval: return TimeSpanType.Instance; default: @@ -495,31 +495,31 @@ private Delegate CreateGetterDelegate(int col) switch (parquetType) { case DataType.Boolean: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Byte: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.SignedByte: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.UnsignedByte: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Short: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.UnsignedShort: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Int16: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.UnsignedInt16: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Int32: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Int64: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Int96: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.ByteArray: return CreateGetterDelegateCore>(col, _parquetConversions.Conv); case DataType.String: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore>(col, _parquetConversions.Conv); case DataType.Float: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Double: @@ -527,11 +527,11 @@ private Delegate CreateGetterDelegate(int col) case DataType.Decimal: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.DateTimeOffset: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Interval: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); default: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore>(col, _parquetConversions.Conv); } } @@ -678,17 +678,17 @@ public ParquetConversions(IChannel channel) public void Conv(ref byte[] src, ref VBuffer dst) => dst = src != null ? new VBuffer(src.Length, src) : new VBuffer(0, new byte[0]); - public void Conv(ref sbyte? src, ref DvInt1 dst) => dst = src ?? DvInt1.NA; + public void Conv(ref sbyte? src, ref sbyte dst) => dst = (sbyte)src; public void Conv(ref byte src, ref byte dst) => dst = src; - public void Conv(ref short? src, ref DvInt2 dst) => dst = src ?? DvInt2.NA; + public void Conv(ref short? src, ref short dst) => dst = (short)src; public void Conv(ref ushort src, ref ushort dst) => dst = src; - public void Conv(ref int? src, ref DvInt4 dst) => dst = src ?? DvInt4.NA; + public void Conv(ref int? src, ref int dst) => dst = (int)src; - public void Conv(ref long? src, ref DvInt8 dst) => dst = src ?? DvInt8.NA; + public void Conv(ref long? src, ref long dst) => dst = (long)src; public void Conv(ref float? src, ref Single dst) => dst = src ?? Single.NaN; @@ -696,13 +696,14 @@ public ParquetConversions(IChannel channel) public void Conv(ref decimal? src, ref Double dst) => dst = src != null ? Decimal.ToDouble((decimal)src) : Double.NaN; - public void Conv(ref string src, ref DvText dst) => dst = new DvText(src); + public void Conv(ref string src, ref ReadOnlyMemory dst) => dst = src.AsMemory(); - public void Conv(ref bool? src, ref DvBool dst) => dst = src ?? DvBool.NA; + //Behavior for NA values is undefined. + public void Conv(ref bool src, ref bool dst) => dst = src; - public void Conv(ref DateTimeOffset src, ref DvDateTimeZone dst) => dst = src; + public void Conv(ref DateTimeOffset src, ref DateTimeOffset dst) => dst = src; - public void Conv(ref IList src, ref DvText dst) => dst = new DvText(ConvertListToString(src)); + public void Conv(ref IList src, ref ReadOnlyMemory dst) => dst = ConvertListToString(src).AsMemory(); /// /// Converts a System.Numerics.BigInteger value to a UInt128 data type value. @@ -727,22 +728,13 @@ public void Conv(ref BigInteger src, ref UInt128 dst) } /// - /// Converts a Parquet Interval data type value to a DvTimeSpan data type value. + /// Converts a Parquet Interval data type value to a TimeSpan data type value. /// /// Parquet Interval value (int : months, int : days, int : milliseconds). - /// DvTimeSpan object. - public void Conv(ref Interval src, ref DvTimeSpan dst) + /// TimeSpan object. + public void Conv(ref Interval src, ref TimeSpan dst) { - try - { - dst = new DvTimeSpan(TimeSpan.FromDays(src.Months * 30 + src.Days) + TimeSpan.FromMilliseconds(src.Millis)); - } - catch (Exception ex) - { - // Handle TimeSpan OverflowException - _ch.Error("Cannot convert Inteval to DvTimeSpan. Exception : '{0}'", ex.Message); - dst = DvTimeSpan.NA; - } + dst = TimeSpan.FromDays(src.Months * 30 + src.Days) + TimeSpan.FromMilliseconds(src.Millis); } private string ConvertListToString(IList list) diff --git a/src/Microsoft.ML.PipelineInference/ColumnTypeInference.cs b/src/Microsoft.ML.PipelineInference/ColumnTypeInference.cs index ca51218c94..5c589c50bf 100644 --- a/src/Microsoft.ML.PipelineInference/ColumnTypeInference.cs +++ b/src/Microsoft.ML.PipelineInference/ColumnTypeInference.cs @@ -8,6 +8,7 @@ using System.Text.RegularExpressions; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Data.Conversion; +using Microsoft.ML.Runtime.Internal.Utilities; namespace Microsoft.ML.Runtime.PipelineInference { @@ -38,7 +39,7 @@ public Arguments() private class IntermediateColumn { - private readonly DvText[] _data; + private readonly ReadOnlyMemory[] _data; private readonly int _columnId; private PrimitiveType _suggestedType; private bool? _hasHeader; @@ -60,13 +61,13 @@ public bool? HasHeader set { _hasHeader = value; } } - public IntermediateColumn(DvText[] data, int columnId) + public IntermediateColumn(ReadOnlyMemory[] data, int columnId) { _data = data; _columnId = columnId; } - public DvText[] RawData { get { return _data; } } + public ReadOnlyMemory[] RawData { get { return _data; } } } public struct Column @@ -88,9 +89,9 @@ public struct InferenceResult public readonly Column[] Columns; public readonly bool HasHeader; public readonly bool IsSuccess; - public readonly DvText[][] Data; + public readonly ReadOnlyMemory[][] Data; - private InferenceResult(bool isSuccess, Column[] columns, bool hasHeader, DvText[][] data) + private InferenceResult(bool isSuccess, Column[] columns, bool hasHeader, ReadOnlyMemory[][] data) { IsSuccess = isSuccess; Columns = columns; @@ -98,7 +99,7 @@ private InferenceResult(bool isSuccess, Column[] columns, bool hasHeader, DvText Data = data; } - public static InferenceResult Success(Column[] columns, bool hasHeader, DvText[][] data) + public static InferenceResult Success(Column[] columns, bool hasHeader, ReadOnlyMemory[][] data) { return new InferenceResult(true, columns, hasHeader, data); } @@ -132,7 +133,7 @@ public void Apply(IntermediateColumn[] columns) if (!col.RawData.Skip(1) .All(x => { - DvBool value; + bool value; return Conversions.Instance.TryParse(ref x, out value); }) ) @@ -141,7 +142,7 @@ public void Apply(IntermediateColumn[] columns) } col.SuggestedType = BoolType.Instance; - DvBool first; + bool first; col.HasHeader = !Conversions.Instance.TryParse(ref col.RawData[0], out first); } @@ -168,7 +169,7 @@ public void Apply(IntermediateColumn[] columns) col.SuggestedType = NumberType.R4; Single first; - col.HasHeader = !col.RawData[0].TryParse(out first); + col.HasHeader = !DoubleParser.TryParse(col.RawData[0].Span, out first); } } } @@ -187,7 +188,7 @@ public void Apply(IntermediateColumn[] columns) } } - private bool? IsLookLikeHeader(DvText value) + private bool? IsLookLikeHeader(ReadOnlyMemory value) { var v = value.ToString(); if (v.Length > 100) @@ -264,7 +265,7 @@ private static InferenceResult InferTextFileColumnTypesCore(IHostEnvironment env // Read all the data into memory. // List items are rows of the dataset. - var data = new List(); + var data = new List[]>(); using (var cursor = idv.GetRowCursor(col => true)) { int columnIndex; @@ -272,26 +273,26 @@ private static InferenceResult InferTextFileColumnTypesCore(IHostEnvironment env Contracts.Assert(found); var colType = cursor.Schema.GetColumnType(columnIndex); Contracts.Assert(colType.ItemType.IsText); - ValueGetter> vecGetter = null; - ValueGetter oneGetter = null; + ValueGetter>> vecGetter = null; + ValueGetter> oneGetter = null; bool isVector = colType.IsVector; if (isVector) - vecGetter = cursor.GetGetter>(columnIndex); + vecGetter = cursor.GetGetter>>(columnIndex); else { Contracts.Assert(args.ColumnCount == 1); - oneGetter = cursor.GetGetter(columnIndex); + oneGetter = cursor.GetGetter>(columnIndex); } - VBuffer line = default(VBuffer); - DvText tsValue = default(DvText); + VBuffer> line = default; + ReadOnlyMemory tsValue = default; while (cursor.MoveNext()) { if (isVector) { vecGetter(ref line); Contracts.Assert(line.Length == args.ColumnCount); - var values = new DvText[args.ColumnCount]; + var values = new ReadOnlyMemory[args.ColumnCount]; line.CopyTo(values); data.Add(values); } diff --git a/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs b/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs index 45decbb88b..ae4cb42bfc 100644 --- a/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs +++ b/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs @@ -110,14 +110,14 @@ public Column(string name, ColumnPurpose purpose, DataKind? dataKind, string ran public sealed class Arguments { - public readonly DvText[][] Data; + public readonly ReadOnlyMemory[][] Data; public readonly Column[] Columns; public readonly long? ApproximateRowCount; public readonly long? FullFileSize; public readonly bool InferencedSchema; public readonly Guid Id; public readonly bool PrettyPrint; - public Arguments(DvText[][] data, Column[] columns, long? fullFileSize, + public Arguments(ReadOnlyMemory[][] data, Column[] columns, long? fullFileSize, long? approximateRowCount, bool inferencedSchema, Guid id, bool prettyPrint = false) { Data = data; @@ -132,7 +132,7 @@ public Arguments(DvText[][] data, Column[] columns, long? fullFileSize, private interface ITypeInferenceExpert { - void Apply(DvText[][] data, Column[] columns); + void Apply(ReadOnlyMemory[][] data, Column[] columns); bool AddMe(); string FeatureName(); } @@ -175,7 +175,7 @@ public ColumnSchema() public string FeatureName() => nameof(ColumnSchema); - public void Apply(DvText[][] data, Column[] columns) + public void Apply(ReadOnlyMemory[][] data, Column[] columns) { Columns = columns; foreach (var column in columns) @@ -245,7 +245,7 @@ public LabelFeatures() LabelFeature = new List(); } - private void ApplyCore(DvText[][] data, Column column) + private void ApplyCore(ReadOnlyMemory[][] data, Column column) { _containsLabelColumns = true; Dictionary histogram = new Dictionary(); @@ -261,12 +261,6 @@ private void ApplyCore(DvText[][] data, Column column) Contracts.Check(data[index].Length > i); - if (data[index][i].IsNA) - { - missingValues++; - continue; - } - label += data[index][i].ToString(); } @@ -288,7 +282,7 @@ private void ApplyCore(DvText[][] data, Column column) }); } - public void Apply(DvText[][] data, Column[] columns) + public void Apply(ReadOnlyMemory[][] data, Column[] columns) { foreach (var column in columns.Where(col => col.ColumnPurpose == ColumnPurpose.Label)) ApplyCore(data, column); @@ -311,7 +305,7 @@ public sealed class MissingValues : ITypeInferenceExpert public int NumberOfFeaturesWithMissingValues; public double PercentageOfFeaturesWithMissingValues; - public void Apply(DvText[][] data, Column[] columns) + public void Apply(ReadOnlyMemory[][] data, Column[] columns) { if (data.GetLength(0) == 0) return; @@ -331,16 +325,6 @@ public void Apply(DvText[][] data, Column[] columns) break; Contracts.Check(data[index].Length > i); - - if (data[index][i].IsNA) - { - NumberOfMissingValues++; - instanceWithMissingValue = true; - if (column.ColumnPurpose == ColumnPurpose.TextFeature || - column.ColumnPurpose == ColumnPurpose.NumericFeature || - column.ColumnPurpose == ColumnPurpose.CategoricalFeature) - featuresWithMissingValues.Set(index, true); - } } } @@ -388,7 +372,7 @@ public ColumnFeatures() StatsPerColumnPurposeWithSpaces = new Dictionary(); } - private void ApplyCore(DvText[][] data, Column column) + private void ApplyCore(ReadOnlyMemory[][] data, Column column) { bool numericColumn = CmdParser.IsNumericType(column.Kind?.ToType()); //Statistics for numeric column or length of the text in the case of non-numeric column. @@ -401,11 +385,8 @@ private void ApplyCore(DvText[][] data, Column column) if (index >= data.GetLength(0)) break; - foreach (DvText value in data[index]) + foreach (ReadOnlyMemory value in data[index]) { - if (value.IsNA) - continue; - string columnPurposeString = column.Purpose; Stats statsPerPurpose; Stats statsPerPurposeSpaces; @@ -452,7 +433,7 @@ private void ApplyCore(DvText[][] data, Column column) } } - public void Apply(DvText[][] data, Column[] columns) + public void Apply(ReadOnlyMemory[][] data, Column[] columns) { foreach (var column in columns) ApplyCore(data, column); diff --git a/src/Microsoft.ML.PipelineInference/InferenceUtils.cs b/src/Microsoft.ML.PipelineInference/InferenceUtils.cs index a9527b9504..478e732b9f 100644 --- a/src/Microsoft.ML.PipelineInference/InferenceUtils.cs +++ b/src/Microsoft.ML.PipelineInference/InferenceUtils.cs @@ -52,14 +52,21 @@ public static Type InferPredictorCategoryType(IDataView data, PurposeInference.C data = data.Take(1000); using (var cursor = data.GetRowCursor(index => index == label.ColumnIndex)) { - ValueGetter getter = DataViewUtils.PopulateGetterArray(cursor, new List { label.ColumnIndex })[0]; + ValueGetter> getter = DataViewUtils.PopulateGetterArray(cursor, new List { label.ColumnIndex })[0]; while (cursor.MoveNext()) { - var currentLabel = new DvText(); + var currentLabel = default(ReadOnlyMemory); getter(ref currentLabel); string currentLabelString = currentLabel.ToString(); if (!String.IsNullOrEmpty(currentLabelString) && !uniqueLabelValues.Contains(currentLabelString)) + { + //Missing values in float and doubles are converted to "NaN" in text and they should not + //be treated as label values. + if ((label.ItemKind == DataKind.R4 || label.ItemKind == DataKind.R8) && currentLabelString == "?") + continue; + uniqueLabelValues.Add(currentLabelString); + } } } diff --git a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs index 60137b5b4b..bfd9f30414 100644 --- a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs +++ b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.Linq; using Microsoft.ML.Runtime; @@ -101,12 +102,12 @@ public static Output ExtractSweepResult(IHostEnvironment env, ResultInput input) else { var builder = new ArrayDataViewBuilder(env); - builder.AddColumn(col1.Key, (PrimitiveType)col1.Value, rows.Select(r => new DvText(r.GraphJson)).ToArray()); + builder.AddColumn(col1.Key, (PrimitiveType)col1.Value, rows.Select(r => r.GraphJson.AsMemory()).ToArray()); builder.AddColumn(col2.Key, (PrimitiveType)col2.Value, rows.Select(r => r.MetricValue).ToArray()); - builder.AddColumn(col3.Key, (PrimitiveType)col3.Value, rows.Select(r => new DvText(r.PipelineId)).ToArray()); + builder.AddColumn(col3.Key, (PrimitiveType)col3.Value, rows.Select(r => r.PipelineId.AsMemory()).ToArray()); builder.AddColumn(col4.Key, (PrimitiveType)col4.Value, rows.Select(r => r.TrainingMetricValue).ToArray()); - builder.AddColumn(col5.Key, (PrimitiveType)col5.Value, rows.Select(r => new DvText(r.FirstInput)).ToArray()); - builder.AddColumn(col6.Key, (PrimitiveType)col6.Value, rows.Select(r => new DvText(r.PredictorModel)).ToArray()); + builder.AddColumn(col5.Key, (PrimitiveType)col5.Value, rows.Select(r => r.FirstInput.AsMemory()).ToArray()); + builder.AddColumn(col6.Key, (PrimitiveType)col6.Value, rows.Select(r => r.PredictorModel.AsMemory()).ToArray()); outputView = builder.GetDataView(); } return new Output { Results = outputView, State = autoMlState }; diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs index 8a279bb579..0984d3fef5 100644 --- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs +++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs @@ -239,17 +239,17 @@ public static PipelineResultRow[] ExtractResults(IHostEnvironment env, IDataView using (var cursor = data.GetRowCursor(col => true)) { var getter1 = cursor.GetGetter(metricCol); - var getter2 = cursor.GetGetter(graphCol); - var getter3 = cursor.GetGetter(pipelineIdCol); + var getter2 = cursor.GetGetter>(graphCol); + var getter3 = cursor.GetGetter>(pipelineIdCol); var getter4 = cursor.GetGetter(trainingMetricCol); - var getter5 = cursor.GetGetter(firstInputCol); - var getter6 = cursor.GetGetter(predictorModelCol); + var getter5 = cursor.GetGetter>(firstInputCol); + var getter6 = cursor.GetGetter>(predictorModelCol); double metricValue = 0; double trainingMetricValue = 0; - DvText graphJson = new DvText(); - DvText pipelineId = new DvText(); - DvText firstInput = new DvText(); - DvText predictorModel = new DvText(); + ReadOnlyMemory graphJson = default; + ReadOnlyMemory pipelineId = default; + ReadOnlyMemory firstInput = default; + ReadOnlyMemory predictorModel = default; while (cursor.MoveNext()) { diff --git a/src/Microsoft.ML.PipelineInference/PurposeInference.cs b/src/Microsoft.ML.PipelineInference/PurposeInference.cs index 8e7c32084e..b2f57328a8 100644 --- a/src/Microsoft.ML.PipelineInference/PurposeInference.cs +++ b/src/Microsoft.ML.PipelineInference/PurposeInference.cs @@ -7,6 +7,7 @@ using System.Linq; using System.Text.RegularExpressions; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Utilities; namespace Microsoft.ML.Runtime.PipelineInference { @@ -172,7 +173,7 @@ public void Apply(IChannel ch, IntermediateColumn[] columns) { if (column.IsPurposeSuggested || !column.Type.IsText) continue; - var data = column.GetData(); + var data = column.GetData>(); long sumLength = 0; int sumSpaces = 0; @@ -181,7 +182,7 @@ public void Apply(IChannel ch, IntermediateColumn[] columns) foreach (var span in data) { sumLength += span.Length; - seen.Add(span.IsNA ? 0 : span.Hash(0)); + seen.Add(Hashing.MurmurHash(0, span.Span)); string spanStr = span.ToString(); sumSpaces += spanStr.Count(x => x == ' '); diff --git a/src/Microsoft.ML.PipelineInference/RecipeInference.cs b/src/Microsoft.ML.PipelineInference/RecipeInference.cs index e4e6c083a0..67f3a7f02d 100644 --- a/src/Microsoft.ML.PipelineInference/RecipeInference.cs +++ b/src/Microsoft.ML.PipelineInference/RecipeInference.cs @@ -202,7 +202,7 @@ protected override IEnumerable ApplyCore(Type predictorType, TransformInference.SuggestedTransform[] transforms) { yield return - new SuggestedRecipe(ToString(), transforms, new SuggestedRecipe.SuggestedLearner[0], Int32.MinValue + 1); + new SuggestedRecipe(ToString(), transforms, new SuggestedRecipe.SuggestedLearner[0], int.MinValue + 1); } public override string ToString() => "Default transforms"; @@ -251,7 +251,7 @@ protected override IEnumerable ApplyCore(Type predictorType, } yield return - new SuggestedRecipe(ToString(), transforms, new[] { learner }, Int32.MaxValue); + new SuggestedRecipe(ToString(), transforms, new[] { learner }, int.MaxValue); } public override string ToString() => "Text classification optimized for speed and accuracy"; diff --git a/src/Microsoft.ML.PipelineInference/TextFileContents.cs b/src/Microsoft.ML.PipelineInference/TextFileContents.cs index aeb72ee0c9..2ba5e23d0b 100644 --- a/src/Microsoft.ML.PipelineInference/TextFileContents.cs +++ b/src/Microsoft.ML.PipelineInference/TextFileContents.cs @@ -131,9 +131,9 @@ private static bool TryParseFile(IChannel ch, TextLoader.Arguments args, IMultiS using (var cursor = idv.GetRowCursor(x => x == columnIndex)) { - var getter = cursor.GetGetter>(columnIndex); + var getter = cursor.GetGetter>>(columnIndex); - VBuffer line = default(VBuffer); + VBuffer> line = default; while (cursor.MoveNext()) { getter(ref line); diff --git a/src/Microsoft.ML.PipelineInference/TransformInference.cs b/src/Microsoft.ML.PipelineInference/TransformInference.cs index 6cfdc58b1b..477e566060 100644 --- a/src/Microsoft.ML.PipelineInference/TransformInference.cs +++ b/src/Microsoft.ML.PipelineInference/TransformInference.cs @@ -364,7 +364,7 @@ public override IEnumerable Apply(IntermediateColumn[] colum if (col.Type.IsText) { - col.GetUniqueValueCounts(out var unique, out var _, out var _); + col.GetUniqueValueCounts>(out var unique, out var _, out var _); ch.Info("Label column '{0}' is text. Suggested auto-labeling.", col.ColumnName); var args = new TransformString("AutoLabel", columnArgument.ToString()); @@ -695,7 +695,7 @@ private bool IsDictionaryOk(IntermediateColumn column, Double dataSampleFraction // Sparse Data for the Language Model Component of a Speech Recognizer" (1987), taking into account that // the singleton count was estimated from a fraction of the data (and assuming the estimate is // roughly the same for the entire sample). - column.GetUniqueValueCounts(out unique, out singletons, out total); + column.GetUniqueValueCounts>(out unique, out singletons, out total); var expectedUnseenValues = singletons / dataSampleFraction; return expectedUnseenValues < 1000 && unique < 10000; } diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs index 88fcd47531..31a9199093 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs @@ -645,7 +645,7 @@ public void Minimize(DifferentiableFunction function, ref VBuffer initial double? improvement = null; double x; int end; - if (message != null && DoubleParser.TryParse(out x, message, 0, message.Length, out end)) + if (message != null && DoubleParser.TryParse(message.AsMemory().Span, out x, out end)) improvement = x; pch.Checkpoint(state.Value, improvement, state.Iter); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictor.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictor.cs index 2a5d73705f..eb0644d2da 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictor.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictor.cs @@ -347,7 +347,7 @@ public virtual IRow GetSummaryIRowOrNull(RoleMappedSchema schema) { var cols = new List(); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names); var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, Weight.Length), ref names); @@ -529,7 +529,7 @@ public override IRow GetStatsIRowOrNull(RoleMappedSchema schema) if (_stats == null) return null; var cols = new List(); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names); // Add the stat columns. diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs index c337abccfc..505d3363e3 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs @@ -35,7 +35,7 @@ public static void SaveAsCode(TextWriter writer, ref VBuffer weights, Flo Contracts.CheckValue(writer, nameof(writer)); Contracts.CheckValueOrNull(schema); - var featureNames = default(VBuffer); + var featureNames = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; @@ -103,7 +103,7 @@ public static string LinearModelAsIni(ref VBuffer weights, Float bias, IP StringBuilder aggregatedNodesBuilder = new StringBuilder("Nodes="); StringBuilder weightsBuilder = new StringBuilder("Weights="); - var featureNames = default(VBuffer); + var featureNames = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; @@ -118,7 +118,7 @@ public static string LinearModelAsIni(ref VBuffer weights, Float bias, IP var name = featureNames.GetItemOrDefault(idx); inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]"); - inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : DvText.Identical(name, DvText.Empty) ? $"f{idx}" : name.ToString())); + inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString())); inputBuilder.AppendLine("Transform=linear"); inputBuilder.AppendLine("Slope=1"); inputBuilder.AppendLine("Intercept=0"); @@ -206,7 +206,7 @@ public static string LinearModelAsText( } public static IEnumerable> GetSortedLinearModelFeatureNamesAndWeights(Single bias, - ref VBuffer weights, ref VBuffer names) + ref VBuffer weights, ref VBuffer> names) { var orderedWeights = weights.Items() .Where(weight => Math.Abs(weight.Value) >= Epsilon) @@ -217,8 +217,7 @@ public static IEnumerable> GetSortedLinearModelFeat { int index = weight.Key; var name = names.GetItemOrDefault(index); - list.Add(new KeyValuePair( - DvText.Identical(name, DvText.Empty) ? $"f{index}" : name.ToString(), weight.Value)); + list.Add(new KeyValuePair(name.IsEmpty ? $"f{index}" : name.ToString(), weight.Value)); } return list; @@ -230,7 +229,7 @@ public static IEnumerable> GetSortedLinearModelFeat public static void SaveLinearModelWeightsInKeyValuePairs( ref VBuffer weights, Float bias, RoleMappedSchema schema, List> results) { - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref names); var pairs = GetSortedLinearModelFeatureNamesAndWeights(bias, ref weights, ref names); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 47b08c586a..d4c265962e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -141,7 +141,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; - var namesSpans = VBufferUtils.CreateEmpty(featureLength); + var namesSpans = VBufferUtils.CreateEmpty>(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); Host.Assert(namesSpans.Length == featureLength); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index ca850fe46e..b8986cce77 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -98,7 +98,7 @@ protected override void CheckLabel(RoleMappedData data) return; } - VBuffer labelNames = default(VBuffer); + VBuffer> labelNames = default; schema.GetMetadata(MetadataUtils.Kinds.KeyValues, labelIdx, ref labelNames); // If label names is not dense or contain NA or default value, then it follows that @@ -113,14 +113,14 @@ protected override void CheckLabel(RoleMappedData data) } _labelNames = new string[_numClasses]; - DvText[] values = labelNames.Values; + ReadOnlyMemory[] values = labelNames.Values; // This hashset is used to verify the uniqueness of label names. HashSet labelNamesSet = new HashSet(); for (int i = 0; i < _numClasses; i++) { - DvText value = values[i]; - if (value.IsEmpty || value.IsNA) + ReadOnlyMemory value = values[i]; + if (value.IsEmpty) { _labelNames = null; break; @@ -754,7 +754,7 @@ public IList> GetSummaryInKeyValuePairs(RoleMappedS List> results = new List>(); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, _numFeatures, ref names); for (int classNumber = 0; classNumber < _biases.Length; classNumber++) { @@ -776,7 +776,7 @@ public IList> GetSummaryInKeyValuePairs(RoleMappedS var name = names.GetItemOrDefault(index); results.Add(new KeyValuePair( - string.Format("{0}+{1}", GetLabelName(classNumber), DvText.Identical(name, DvText.Empty) ? $"f{index}" : name.ToString()), + string.Format("{0}+{1}", GetLabelName(classNumber), name.IsEmpty ? $"f{index}" : name.ToString()), value )); } @@ -927,8 +927,8 @@ public IDataView GetSummaryDataView(RoleMappedSchema schema) { var bldr = new ArrayDataViewBuilder(Host); - ValueGetter> getSlotNames = - (ref VBuffer dst) => + ValueGetter>> getSlotNames = + (ref VBuffer> dst) => MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, _numFeatures, ref dst); // Add the bias and the weight columns. @@ -949,7 +949,7 @@ public IRow GetStatsIRowOrNull(RoleMappedSchema schema) return null; var cols = new List(); - var names = default(VBuffer); + var names = default(VBuffer>); _stats.AddStatsColumns(cols, null, schema, ref names); return RowColumnUtils.GetRow(null, cols.ToArray()); } diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 91874291b0..e1594041f8 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -225,8 +225,8 @@ public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias return true; } - private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, ref VBuffer weights, ref VBuffer names, - ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter> getSlotNames) + private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, ref VBuffer weights, ref VBuffer> names, + ref VBuffer estimate, ref VBuffer stdErr, ref VBuffer zScore, ref VBuffer pValue, out ValueGetter>> getSlotNames) { if (!stats._coeffStdError.HasValue) { @@ -270,17 +270,17 @@ private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stat var slotNames = names; getSlotNames = - (ref VBuffer dst) => + (ref VBuffer> dst) => { var values = dst.Values; if (Utils.Size(values) < stats.ParametersCount - 1) - values = new DvText[stats.ParametersCount - 1]; + values = new ReadOnlyMemory[stats.ParametersCount - 1]; for (int i = 1; i < stats.ParametersCount; i++) { int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1; values[i - 1] = slotNames.GetItemOrDefault(wi); } - dst = new VBuffer(stats.ParametersCount - 1, values, dst.Indices); + dst = new VBuffer>(stats.ParametersCount - 1, values, dst.Indices); }; } @@ -296,7 +296,7 @@ private IEnumerable GetUnorderedCoefficientStatistics(Lin _env.Assert(_paramCount == 1 || weights != null); _env.Assert(_coeffStdError.Value.Length == weights.Count + 1); - var names = default(VBuffer); + var names = default(VBuffer>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Count, ref names); Single[] stdErrorValues = _coeffStdError.Value.Values; @@ -408,13 +408,13 @@ public void SaveSummaryInKeyValuePairs(LinearBinaryPredictor parent, } } - public void AddStatsColumns(List list, LinearBinaryPredictor parent, RoleMappedSchema schema, ref VBuffer names) + public void AddStatsColumns(List list, LinearBinaryPredictor parent, RoleMappedSchema schema, ref VBuffer> names) { _env.AssertValue(list); _env.AssertValueOrNull(parent); _env.AssertValue(schema); - DvInt8 count = _trainingExampleCount; + long count = _trainingExampleCount; list.Add(RowColumnUtils.GetColumn("Count of training examples", NumberType.I8, ref count)); var dev = _deviance; list.Add(RowColumnUtils.GetColumn("Residual Deviance", NumberType.R4, ref dev)); @@ -444,7 +444,7 @@ public void AddStatsColumns(List list, LinearBinaryPredictor parent, Ro var stdErr = default(VBuffer); var zScore = default(VBuffer); var pValue = default(VBuffer); - ValueGetter> getSlotNames; + ValueGetter>> getSlotNames; GetUnorderedCoefficientStatistics(parent.Statistics, ref weights, ref names, ref estimate, ref stdErr, ref zScore, ref pValue, out getSlotNames); var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames, diff --git a/src/Microsoft.ML.Sweeper/Parameters.cs b/src/Microsoft.ML.Sweeper/Parameters.cs index dd46374732..6f78bcf521 100644 --- a/src/Microsoft.ML.Sweeper/Parameters.cs +++ b/src/Microsoft.ML.Sweeper/Parameters.cs @@ -588,7 +588,7 @@ public bool TryParseParameter(string paramValue, Type paramType, string paramNam } if (option.StartsWith("steps")) { - numSteps = Int32.Parse(option.Substring(option.IndexOf(':') + 1)); + numSteps = int.Parse(option.Substring(option.IndexOf(':') + 1)); optionsSpecified[1] = true; } if (option.StartsWith("inc")) @@ -613,9 +613,9 @@ public bool TryParseParameter(string paramValue, Type paramType, string paramNam if (paramType == typeof(UInt16) || paramType == typeof(UInt32) || paramType == typeof(UInt64) - || paramType == typeof(Int16) - || paramType == typeof(Int32) - || paramType == typeof(Int64)) + || paramType == typeof(short) + || paramType == typeof(int) + || paramType == typeof(long)) { long min; long max; diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 617752b2c0..67a6bbd951 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -413,7 +413,7 @@ private void SetMetadata() for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { using (var bldr = md.BuildMetadata(iinfo, Source.Schema, Infos[iinfo].Source, MetadataUtils.Kinds.SlotNames)) - bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True); + bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, true); } md.Seal(); } diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 8e07b11e06..c3a0bf8736 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -426,7 +426,6 @@ private sealed class GroupKeyColumnChecker public readonly Func IsSameKey; private static Func MakeSameChecker(IRow row, int col) - where T : IEquatable { T oldValue = default(T); T newValue = default(T); @@ -436,7 +435,16 @@ private static Func MakeSameChecker(IRow row, int col) () => { getter(ref newValue); - bool result = first || oldValue.Equals(newValue); + bool result; + + if ((typeof(IEquatable).IsAssignableFrom(typeof(T)))) + result = oldValue.Equals(newValue); + else if ((typeof(ReadOnlyMemory).IsAssignableFrom(typeof(T)))) + result = ((ReadOnlyMemory)(object)oldValue).Span.SequenceEqual(((ReadOnlyMemory)(object)newValue).Span); + else + Contracts.Check(result = false, "Invalid type."); + + result = result || first; oldValue = newValue; first = false; return result; diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs index 8120ffc078..d8e58c84ed 100644 --- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs @@ -343,11 +343,11 @@ private ColumnInfoEx CreateColumnInfoEx(bool join, string customSlotMap, int has private int[][] CompileSlotMap(string slotMapString, int srcSlotCount) { - var parts = new DvText(slotMapString).Split(new[] { ';' }).ToArray(); + var parts = ReadOnlyMemoryUtils.Split(slotMapString.AsMemory(), new[] { ';' }).ToArray(); var slotMap = new int[parts.Length][]; for (int i = 0; i < slotMap.Length; i++) { - var slotIndices = parts[i].Split(new[] { ',' }).ToArray(); + var slotIndices = ReadOnlyMemoryUtils.Split(parts[i], new[] { ',' }).ToArray(); var slots = new int[slotIndices.Length]; slotMap[i] = slots; for (int j = 0; j < slots.Length; j++) @@ -397,14 +397,14 @@ private void SetMetadata() continue; using (var bldr = md.BuildMetadata(i)) { - bldr.AddGetter>(MetadataUtils.Kinds.SlotNames, + bldr.AddGetter>>(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, ex.SlotMap.Length), GetSlotNames); } } md.Seal(); } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); @@ -413,11 +413,11 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) int n = _exes[iinfo].OutputValueCount; var output = dst.Values; if (Utils.Size(output) < n) - output = new DvText[n]; + output = new ReadOnlyMemory[n]; var srcColumnName = Source.Schema.GetColumnName(Infos[iinfo].Source); bool useDefaultSlotNames = !Source.Schema.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize); - VBuffer srcSlotNames = default(VBuffer); + VBuffer> srcSlotNames = default; if (!useDefaultSlotNames) { Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref srcSlotNames); @@ -444,10 +444,10 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) outputSlotName.Append(srcSlotNames.Values[inputSlotIndex]); } - output[slot] = new DvText(outputSlotName.ToString()); + output[slot] = outputSlotName.ToString().AsMemory(); } - dst = new VBuffer(n, output, dst.Indices); + dst = new VBuffer>(n, output, dst.Indices); } private delegate uint HashDelegate(ref TSrc value, uint seed); diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 955940b916..f1503c336a 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -247,46 +247,46 @@ private void AddMetadata(int i, ColumnMetadataInfo colMetaInfo) { if (typeNames != null) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> getter = (int col, ref VBuffer> dst) => { GenerateBitSlotName(i, ref dst); }; - var info = new MetadataInfo>(new VectorType(TextType.Instance, _types[i]), getter); + var info = new MetadataInfo>>(new VectorType(TextType.Instance, _types[i]), getter); colMetaInfo.Add(MetadataUtils.Kinds.SlotNames, info); } - MetadataUtils.MetadataGetter normalizeGetter = (int col, ref DvBool dst) => + MetadataUtils.MetadataGetter normalizeGetter = (int col, ref bool dst) => { dst = true; }; - var normalizeInfo = new MetadataInfo(BoolType.Instance, normalizeGetter); + var normalizeInfo = new MetadataInfo(BoolType.Instance, normalizeGetter); colMetaInfo.Add(MetadataUtils.Kinds.IsNormalized, normalizeInfo); } else { if (typeNames != null && _types[i].IsKnownSizeVector) { - MetadataUtils.MetadataGetter> getter = (int col, ref VBuffer dst) => + MetadataUtils.MetadataGetter>> getter = (int col, ref VBuffer> dst) => { GetSlotNames(i, ref dst); }; - var info = new MetadataInfo>(new VectorType(TextType.Instance, _types[i]), getter); + var info = new MetadataInfo>>(new VectorType(TextType.Instance, _types[i]), getter); colMetaInfo.Add(MetadataUtils.Kinds.SlotNames, info); } } } - private void GenerateBitSlotName(int iinfo, ref VBuffer dst) + private void GenerateBitSlotName(int iinfo, ref VBuffer> dst) { const string slotNamePrefix = "Bit"; - var bldr = new BufferBuilder(TextCombiner.Instance); + var bldr = new BufferBuilder>(TextCombiner.Instance); bldr.Reset(_bitsPerKey[iinfo], true); for (int i = 0; i < _bitsPerKey[iinfo]; i++) - bldr.AddFeature(i, new DvText(slotNamePrefix + (_bitsPerKey[iinfo] - i - 1))); + bldr.AddFeature(i, (slotNamePrefix + (_bitsPerKey[iinfo] - i - 1)).AsMemory()); bldr.GetResult(ref dst); } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < _infos.Length); Host.Assert(_types[iinfo].IsKnownSizeVector); @@ -296,7 +296,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Host.Assert(typeSrc.VectorSize > 1); // Get the source slot names, defaulting to empty text. - var namesSlotSrc = default(VBuffer); + var namesSlotSrc = default(VBuffer>); InputSchema.TryGetColumnIndex(_infos[iinfo].Source, out int srcCol); Host.Assert(srcCol >= 0); var typeSlotSrc = InputSchema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, srcCol); @@ -306,25 +306,25 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Host.Check(namesSlotSrc.Length == typeSrc.VectorSize); } else - namesSlotSrc = VBufferUtils.CreateEmpty(typeSrc.VectorSize); + namesSlotSrc = VBufferUtils.CreateEmpty>(typeSrc.VectorSize); int slotLim = _types[iinfo].VectorSize; Host.Assert(slotLim == (long)typeSrc.VectorSize * _bitsPerKey[iinfo]); var values = dst.Values; if (Utils.Size(values) < slotLim) - values = new DvText[slotLim]; + values = new ReadOnlyMemory[slotLim]; var sb = new StringBuilder(); int slot = 0; - VBuffer bits = default; + VBuffer> bits = default; GenerateBitSlotName(iinfo, ref bits); foreach (var kvpSlot in namesSlotSrc.Items(all: true)) { Contracts.Assert(slot == (long)kvpSlot.Key * _bitsPerKey[iinfo]); sb.Clear(); - if (kvpSlot.Value.HasChars) - kvpSlot.Value.AddToStringBuilder(sb); + if (!kvpSlot.Value.IsEmpty) + sb.AppendMemory(kvpSlot.Value); else sb.Append('[').Append(kvpSlot.Key).Append(']'); sb.Append('.'); @@ -333,13 +333,13 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) foreach (var key in bits.Values) { sb.Length = len; - key.AddToStringBuilder(sb); - values[slot++] = new DvText(sb.ToString()); + sb.AppendMemory(key); + values[slot++] = sb.ToString().AsMemory(); } } Host.Assert(slot == slotLim); - dst = new VBuffer(slotLim, values, dst.Indices); + dst = new VBuffer>(slotLim, values, dst.Indices); } protected override Delegate MakeGetter(IRow input, int iinfo, out Action disposer) diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 7020bc0830..0a8d247766 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -159,7 +159,7 @@ private VectorType[] GetTypesAndMetadata() // Add slot names metadata. using (var bldr = md.BuildMetadata(iinfo)) { - bldr.AddGetter>(MetadataUtils.Kinds.SlotNames, + bldr.AddGetter>>(MetadataUtils.Kinds.SlotNames, MetadataUtils.GetNamesType(types[iinfo].VectorSize), GetSlotNames); } } @@ -173,7 +173,7 @@ protected override ColumnType GetColumnTypeCore(int iinfo) return _types[iinfo]; } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); @@ -183,15 +183,15 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) var values = dst.Values; if (Utils.Size(values) < size) - values = new DvText[size]; + values = new ReadOnlyMemory[size]; var type = Infos[iinfo].TypeSrc; if (!type.IsVector) { Host.Assert(_types[iinfo].VectorSize == 2); var columnName = Source.Schema.GetColumnName(Infos[iinfo].Source); - values[0] = new DvText(columnName); - values[1] = new DvText(columnName + IndicatorSuffix); + values[0] = columnName.AsMemory(); + values[1] = (columnName + IndicatorSuffix).AsMemory(); } else { @@ -203,7 +203,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) if (typeNames == null || typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText) throw MetadataUtils.ExceptGetMetadata(); - var names = default(VBuffer); + var names = default(VBuffer>); Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref names); // We both assert and check. If this fails, there is a bug somewhere (possibly in this code @@ -219,22 +219,22 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) Host.Assert(slot % 2 == 0); sb.Clear(); - if (!kvp.Value.HasChars) + if (kvp.Value.IsEmpty) sb.Append('[').Append(slot / 2).Append(']'); else - kvp.Value.AddToStringBuilder(sb); + sb.AppendMemory(kvp.Value); int len = sb.Length; sb.Append(IndicatorSuffix); var str = sb.ToString(); - values[slot++] = new DvText(str, 0, len); - values[slot++] = new DvText(str); + values[slot++] = str.AsMemory().Slice(0, len); + values[slot++] = str.AsMemory(); } Host.Assert(slot == size); } - dst = new VBuffer(size, values, dst.Indices); + dst = new VBuffer>(size, values, dst.Indices); } protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 0af833a046..28bfc8d511 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -300,7 +300,7 @@ private sealed class Impl private int[] _featureSums; private readonly List _singles; private readonly List _doubles; - private ValueMapper, VBuffer> _boolMapper; + private ValueMapper, VBuffer> _boolMapper; public Impl(IHost host) { @@ -407,7 +407,7 @@ private void GetLabels(Transposer trans, ColumnType labelType, int labelCol) // Note: NAs have their own separate bin. if (labelType == NumberType.I4) { - var tmp = default(VBuffer); + var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); BinInts(ref tmp, ref labels, _numBins, out min, out lim); _numLabels = lim - min; @@ -428,7 +428,7 @@ private void GetLabels(Transposer trans, ColumnType labelType, int labelCol) } else if (labelType.IsBool) { - var tmp = default(VBuffer); + var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); BinBools(ref tmp, ref labels); _numLabels = 3; @@ -486,7 +486,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) if (type.ItemType == NumberType.I4) { return ComputeMutualInformation(trans, col, - (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => + (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => { BinInts(ref src, ref dst, _numBins, out min, out lim); }); @@ -510,7 +510,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) if (type.ItemType.IsBool) { return ComputeMutualInformation(trans, col, - (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => + (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => { min = -1; lim = 2; @@ -674,29 +674,20 @@ private static ValueMapper, VBuffer> BinKeys(ColumnType colTy } /// - /// Maps from DvInt4 to ints. NaNs (and only NaNs) are mapped to the first bin. + /// Maps Ints. /// - private void BinInts(ref VBuffer input, ref VBuffer output, + private void BinInts(ref VBuffer input, ref VBuffer output, int numBins, out int min, out int lim) { Contracts.Assert(_singles.Count == 0); - if (input.Values != null) - { - for (int i = 0; i < input.Count; i++) - { - var val = input.Values[i]; - if (!val.IsNA) - _singles.Add((Single)val); - } - } var bounds = _binFinder.FindBins(numBins, _singles, input.Length - input.Count); min = -1 - bounds.FindIndexSorted(0); lim = min + bounds.Length + 1; int offset = min; - ValueMapper mapper = - (ref DvInt4 src, ref int dst) => - dst = src.IsNA ? offset : offset + 1 + bounds.FindIndexSorted((Single)src); + ValueMapper mapper = + (ref int src, ref int dst) => + dst = offset + 1 + bounds.FindIndexSorted((Single)src); mapper.MapVector(ref input, ref output); _singles.Clear(); } @@ -756,16 +747,16 @@ private void BinDoubles(ref VBuffer input, ref VBuffer output, _doubles.Clear(); } - private void BinBools(ref VBuffer input, ref VBuffer output) + private void BinBools(ref VBuffer input, ref VBuffer output) { if (_boolMapper == null) - _boolMapper = CreateVectorMapper(BinOneBool); + _boolMapper = CreateVectorMapper(BinOneBool); _boolMapper(ref input, ref output); } - private void BinOneBool(ref DvBool src, ref int dst) + private void BinOneBool(ref bool src, ref int dst) { - dst = src.IsNA ? -1 : src.IsFalse ? 0 : 1; + dst = Convert.ToInt32(src); } } diff --git a/src/Microsoft.ML.Transforms/NADropTransform.cs b/src/Microsoft.ML.Transforms/NADropTransform.cs index 80e88f3ae3..ea47fb9d1a 100644 --- a/src/Microsoft.ML.Transforms/NADropTransform.cs +++ b/src/Microsoft.ML.Transforms/NADropTransform.cs @@ -100,7 +100,7 @@ private Delegate[] InitIsNAAndMetadata() MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues)) { // Output does not have missings. - bldr.AddPrimitive(MetadataUtils.Kinds.HasMissingValues, BoolType.Instance, DvBool.False); + bldr.AddPrimitive(MetadataUtils.Kinds.HasMissingValues, BoolType.Instance, false); } } md.Seal(); diff --git a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs index 7607e19c61..39b3d650d2 100644 --- a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs @@ -156,7 +156,7 @@ private ColumnType[] GetTypesAndMetadata() using (var bldr = md.BuildMetadata(iinfo, Source.Schema, Infos[iinfo].Source, MetadataUtils.Kinds.SlotNames)) { // Output is normalized. - bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True); + bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, true); } } md.Seal(); @@ -184,41 +184,41 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou /// /// Getter generator for single valued inputs. /// - private ValueGetter ComposeGetterOne(IRow input, int iinfo) + private ValueGetter ComposeGetterOne(IRow input, int iinfo) { - Func> func = ComposeGetterOne; + Func> func = ComposeGetterOne; return Utils.MarshalInvoke(func, Infos[iinfo].TypeSrc.RawType, input, iinfo); } /// /// Tests if a value is NA for scalars. /// - private ValueGetter ComposeGetterOne(IRow input, int iinfo) + private ValueGetter ComposeGetterOne(IRow input, int iinfo) { var getSrc = GetSrcGetter(input, iinfo); var isNA = Conversions.Instance.GetIsNAPredicate(input.Schema.GetColumnType(Infos[iinfo].Source)); T src = default(T); return - (ref DvBool dst) => + (ref bool dst) => { getSrc(ref src); - dst = isNA(ref src) ? DvBool.True : DvBool.False; + dst = isNA(ref src); }; } /// /// Getter generator for vector valued inputs. /// - private ValueGetter> ComposeGetterVec(IRow input, int iinfo) + private ValueGetter> ComposeGetterVec(IRow input, int iinfo) { - Func>> func = ComposeGetterVec; + Func>> func = ComposeGetterVec; return Utils.MarshalInvoke(func, Infos[iinfo].TypeSrc.ItemType.RawType, input, iinfo); } /// /// Tests if a value is NA for vectors. /// - private ValueGetter> ComposeGetterVec(IRow input, int iinfo) + private ValueGetter> ComposeGetterVec(IRow input, int iinfo) { var getSrc = GetSrcGetter>(input, iinfo); var isNA = Conversions.Instance.GetIsNAPredicate(input.Schema.GetColumnType(Infos[iinfo].Source).ItemType); @@ -227,7 +227,7 @@ private ValueGetter> ComposeGetterVec(IRow input, int iinfo) var src = default(VBuffer); var indices = new List(); return - (ref VBuffer dst) => + (ref VBuffer dst) => { // Sense indicates if the values added to the indices list represent NAs or non-NAs. bool sense; @@ -285,7 +285,7 @@ private void FindNAs(ref VBuffer src, RefPredicate isNA, bool defaultIs /// Fills indicator values for vectors. The indices is a list that either holds all of the NAs or all /// of the non-NAs, indicated by sense being true or false respectively. /// - private void FillValues(int srcLength, ref VBuffer dst, List indices, bool sense) + private void FillValues(int srcLength, ref VBuffer dst, List indices, bool sense) { var dstValues = dst.Values; var dstIndices = dst.Indices; @@ -295,15 +295,15 @@ private void FillValues(int srcLength, ref VBuffer dst, List indice if (sense) { // Return empty VBuffer. - dst = new VBuffer(srcLength, 0, dstValues, dstIndices); + dst = new VBuffer(srcLength, 0, dstValues, dstIndices); return; } // Return VBuffer filled with 1's. Utils.EnsureSize(ref dstValues, srcLength, false); for (int i = 0; i < srcLength; i++) - dstValues[i] = DvBool.True; - dst = new VBuffer(srcLength, dstValues, dstIndices); + dstValues[i] = true; + dst = new VBuffer(srcLength, dstValues, dstIndices); return; } @@ -316,10 +316,10 @@ private void FillValues(int srcLength, ref VBuffer dst, List indice indices.CopyTo(dstIndices); for (int ii = 0; ii < dstCount; ii++) - dstValues[ii] = DvBool.True; + dstValues[ii] = true; Host.Assert(dstCount <= srcLength); - dst = new VBuffer(srcLength, dstCount, dstValues, dstIndices); + dst = new VBuffer(srcLength, dstCount, dstValues, dstIndices); } else if (!sense && srcLength - indices.Count < srcLength / 2) { @@ -342,7 +342,7 @@ private void FillValues(int srcLength, ref VBuffer dst, List indice if (i < iNext) { Host.Assert(iiDst < dstCount); - dstValues[iiDst] = DvBool.True; + dstValues[iiDst] = true; dstIndices[iiDst++] = i; } else @@ -355,7 +355,7 @@ private void FillValues(int srcLength, ref VBuffer dst, List indice Host.Assert(srcLength == iiSrc + iiDst); Host.Assert(iiDst == dstCount); - dst = new VBuffer(srcLength, dstCount, dstValues, dstIndices); + dst = new VBuffer(srcLength, dstCount, dstValues, dstIndices); } else { @@ -367,24 +367,21 @@ private void FillValues(int srcLength, ref VBuffer dst, List indice indices.Add(srcLength); int ii = 0; - // Assigns values correctly depending on the sense. - DvBool hit = sense ? DvBool.True : DvBool.False; - DvBool miss = sense ? DvBool.False : DvBool.True; for (int i = 0; i < srcLength; i++) { Host.Assert(0 <= i && i <= indices[ii]); if (i == indices[ii]) { - dstValues[i] = hit; + dstValues[i] = sense; ii++; Host.Assert(ii < indices.Count); Host.Assert(indices[ii - 1] < indices[ii]); } else - dstValues[i] = miss; + dstValues[i] = !sense; } - dst = new VBuffer(srcLength, dstValues, dstIndices); + dst = new VBuffer(srcLength, dstValues, dstIndices); } } } diff --git a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs index d8cd698658..1f3b4ee220 100644 --- a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs +++ b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs @@ -452,8 +452,8 @@ private object GetSpecifiedValue(string srcStr, ColumnType dstType, RefPredic if (!string.IsNullOrEmpty(srcStr)) { // Handles converting input strings to correct types. - DvText srcTxt = new DvText(srcStr); - var strToT = Conversions.Instance.GetStandardConversion(TextType.Instance, dstType.ItemType, out bool identity); + var srcTxt = srcStr.AsMemory(); + var strToT = Conversions.Instance.GetStandardConversion, T>(TextType.Instance, dstType.ItemType, out bool identity); strToT(ref srcTxt, ref val); // Make sure that the srcTxt can legitimately be converted to dstType, throw error otherwise. if (isNA(ref val)) diff --git a/src/Microsoft.ML.Transforms/NAReplaceUtils.cs b/src/Microsoft.ML.Transforms/NAReplaceUtils.cs index 8100a5f84b..cf4c32cc0e 100644 --- a/src/Microsoft.ML.Transforms/NAReplaceUtils.cs +++ b/src/Microsoft.ML.Transforms/NAReplaceUtils.cs @@ -14,7 +14,7 @@ public sealed partial class NAReplaceTransform { private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, ReplacementKind? kind, bool bySlot, IRowCursor cursor, int col) { - ch.Assert(type.ItemType.IsNumber || type.ItemType.IsTimeSpan || type.ItemType.IsDateTime); + ch.Assert(type.ItemType.IsNumber); if (!type.IsVector) { // The type is a scalar. @@ -22,22 +22,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.RawKind) { - case DataKind.I1: - return new I1.MeanAggregatorOne(ch, cursor, col); - case DataKind.I2: - return new I2.MeanAggregatorOne(ch, cursor, col); - case DataKind.I4: - return new I4.MeanAggregatorOne(ch, cursor, col); - case DataKind.I8: - return new Long.MeanAggregatorOne(ch, type, cursor, col); case DataKind.R4: return new R4.MeanAggregatorOne(ch, cursor, col); case DataKind.R8: return new R8.MeanAggregatorOne(ch, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorOne(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorOne(ch, type, cursor, col); default: break; } @@ -46,22 +34,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.RawKind) { - case DataKind.I1: - return new I1.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I2: - return new I2.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I4: - return new I4.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I8: - return new Long.MinMaxAggregatorOne(ch, type, cursor, col, kind == ReplacementKind.Max); case DataKind.R4: return new R4.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorOne(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorOne(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -78,22 +54,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.ItemType.RawKind) { - case DataKind.I1: - return new I1.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.I2: - return new I2.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.I4: - return new I4.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.I8: - return new Long.MeanAggregatorBySlot(ch, type, cursor, col); case DataKind.R4: return new R4.MeanAggregatorBySlot(ch, type, cursor, col); case DataKind.R8: return new R8.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorBySlot(ch, type, cursor, col); default: break; } @@ -102,22 +66,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.ItemType.RawKind) { - case DataKind.I1: - return new I1.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.I2: - return new I2.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.I4: - return new I4.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.I8: - return new Long.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); case DataKind.R4: return new R4.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -130,22 +82,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.ItemType.RawKind) { - case DataKind.I1: - return new I1.MeanAggregatorAcrossSlots(ch, cursor, col); - case DataKind.I2: - return new I2.MeanAggregatorAcrossSlots(ch, cursor, col); - case DataKind.I4: - return new I4.MeanAggregatorAcrossSlots(ch, cursor, col); - case DataKind.I8: - return new Long.MeanAggregatorAcrossSlots(ch, type, cursor, col); case DataKind.R4: return new R4.MeanAggregatorAcrossSlots(ch, cursor, col); case DataKind.R8: return new R8.MeanAggregatorAcrossSlots(ch, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorAcrossSlots(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorAcrossSlots(ch, type, cursor, col); default: break; } @@ -154,22 +94,10 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, { switch (type.ItemType.RawKind) { - case DataKind.I1: - return new I1.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I2: - return new I2.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I4: - return new I4.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.I8: - return new Long.MinMaxAggregatorAcrossSlots(ch, type, cursor, col, kind == ReplacementKind.Max); case DataKind.R4: return new R4.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorAcrossSlots(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorAcrossSlots(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -503,17 +431,17 @@ private void AssertValid(long valMax) Contracts.Assert(_cna >= 0); } - public void Update(long val, long valMax) + public void Update(long? val, long valMax) { AssertValid(valMax); - Contracts.Assert(-valMax - 1 <= val && val <= valMax); + Contracts.Assert(!val.HasValue || -valMax <= val && val <= valMax); - if (val >= 0) + if (!val.HasValue) + _cna++; + else if (val >= 0) IntUtils.Add(ref _sumHi, ref _sumLo, (ulong)val); - else if (val >= -valMax) - IntUtils.Sub(ref _sumHi, ref _sumLo, (ulong)(-val)); else - _cna++; + IntUtils.Sub(ref _sumHi, ref _sumLo, (ulong)(-val)); AssertValid(valMax); } @@ -928,800 +856,5 @@ public override object GetStat() } } } - - private static class I1 - { - // Utilizes MeanStatInt for the mean aggregators of all IX types, TS, and DT. - - private const long MaxVal = sbyte.MaxValue; - - public sealed class MeanAggregatorOne : StatAggregator - { - public MeanAggregatorOne(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessRow(ref DvInt1 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt1)(sbyte)val; - } - } - - public sealed class MeanAggregatorAcrossSlots : StatAggregatorAcrossSlots - { - public MeanAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt1 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, ValueCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt1)(sbyte)val; - } - } - - public sealed class MeanAggregatorBySlot : StatAggregatorBySlot - { - public MeanAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, type, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt1 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - Stat[slot].Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - DvInt1[] stat = new DvInt1[Stat.Length]; - for (int slot = 0; slot < stat.Length; slot++) - { - long val = Stat[slot].GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - stat[slot] = (DvInt1)(sbyte)val; - } - return stat; - } - } - - public sealed class MinMaxAggregatorOne : MinMaxAggregatorOne - { - public MinMaxAggregatorOne(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (sbyte)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt1 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt1.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt1 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - return (DvInt1)Stat; - } - } - - public sealed class MinMaxAggregatorAcrossSlots : MinMaxAggregatorAcrossSlots - { - public MinMaxAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (sbyte)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt1 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt1.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt1 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - // If sparsity occurred, fold in a zero. - if (ValueCount > (ulong)ValuesProcessed) - { - var def = default(DvInt1); - ProcValueDelegate(ref def); - } - return (DvInt1)Stat; - } - } - - public sealed class MinMaxAggregatorBySlot : MinMaxAggregatorBySlot - { - public MinMaxAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, type, cursor, col, returnMax) - { - sbyte bound = (sbyte)(ReturnMax ? -MaxVal : MaxVal); - for (int i = 0; i < Stat.Length; i++) - Stat[i] = bound; - } - - protected override void ProcessValueMin(ref DvInt1 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw < Stat[slot] && raw != DvInt1.RawNA) - Stat[slot] = raw; - } - - protected override void ProcessValueMax(ref DvInt1 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw > Stat[slot]) - Stat[slot] = raw; - } - - public override object GetStat() - { - DvInt1[] stat = new DvInt1[Stat.Length]; - // Account for defaults resulting from sparsity. - for (int slot = 0; slot < Stat.Length; slot++) - { - if (GetValuesProcessed(slot) < RowCount) - { - var def = default(DvInt1); - ProcValueDelegate(ref def, slot); - } - stat[slot] = (DvInt1)Stat[slot]; - } - return stat; - } - } - } - - private static class I2 - { - private const long MaxVal = short.MaxValue; - - public sealed class MeanAggregatorOne : StatAggregator - { - public MeanAggregatorOne(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessRow(ref DvInt2 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt2)(short)val; - } - } - - public sealed class MeanAggregatorAcrossSlots : StatAggregatorAcrossSlots - { - public MeanAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt2 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, ValueCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt2)(short)val; - } - } - - public sealed class MeanAggregatorBySlot : StatAggregatorBySlot - { - public MeanAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, type, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt2 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - Stat[slot].Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - DvInt2[] stat = new DvInt2[Stat.Length]; - for (int slot = 0; slot < stat.Length; slot++) - { - long val = Stat[slot].GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - stat[slot] = (DvInt2)(short)val; - } - return stat; - } - } - - public sealed class MinMaxAggregatorOne : MinMaxAggregatorOne - { - public MinMaxAggregatorOne(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (short)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt2 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt2.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt2 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - return (DvInt2)Stat; - } - } - - public sealed class MinMaxAggregatorAcrossSlots : MinMaxAggregatorAcrossSlots - { - public MinMaxAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (short)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt2 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt2.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt2 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - // If sparsity occurred, fold in a zero. - if (ValueCount > (ulong)ValuesProcessed) - { - var def = default(DvInt2); - ProcValueDelegate(ref def); - } - return (DvInt2)Stat; - } - } - - public sealed class MinMaxAggregatorBySlot : MinMaxAggregatorBySlot - { - public MinMaxAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, type, cursor, col, returnMax) - { - short bound = (short)(ReturnMax ? -MaxVal : MaxVal); - for (int i = 0; i < Stat.Length; i++) - Stat[i] = bound; - } - - protected override void ProcessValueMin(ref DvInt2 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw < Stat[slot] && raw != DvInt2.RawNA) - Stat[slot] = raw; - } - - protected override void ProcessValueMax(ref DvInt2 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw > Stat[slot]) - Stat[slot] = raw; - } - - public override object GetStat() - { - DvInt2[] stat = new DvInt2[Stat.Length]; - // Account for defaults resulting from sparsity. - for (int slot = 0; slot < Stat.Length; slot++) - { - if (GetValuesProcessed(slot) < RowCount) - { - var def = default(DvInt2); - ProcValueDelegate(ref def, slot); - } - stat[slot] = (DvInt2)Stat[slot]; - } - return stat; - } - } - } - - private static class I4 - { - private const long MaxVal = int.MaxValue; - - public sealed class MeanAggregatorOne : StatAggregator - { - public MeanAggregatorOne(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessRow(ref DvInt4 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt4)(int)val; - } - } - - public sealed class MeanAggregatorAcrossSlots : StatAggregatorAcrossSlots - { - public MeanAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt4 val) - { - Stat.Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, ValueCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - return (DvInt4)(int)val; - } - } - - public sealed class MeanAggregatorBySlot : StatAggregatorBySlot - { - public MeanAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, type, cursor, col) - { - } - - protected override void ProcessValue(ref DvInt4 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - Stat[slot].Update(val.RawValue, MaxVal); - } - - public override object GetStat() - { - DvInt4[] stat = new DvInt4[Stat.Length]; - for (int slot = 0; slot < stat.Length; slot++) - { - long val = Stat[slot].GetCurrentValue(Ch, RowCount, MaxVal); - Ch.Assert(-MaxVal - 1 <= val && val <= MaxVal); - stat[slot] = (DvInt4)(int)val; - } - return stat; - } - } - - public sealed class MinMaxAggregatorOne : MinMaxAggregatorOne - { - public MinMaxAggregatorOne(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (int)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt4 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt4.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt4 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - return (DvInt4)Stat; - } - } - - public sealed class MinMaxAggregatorAcrossSlots : MinMaxAggregatorAcrossSlots - { - public MinMaxAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = (int)(ReturnMax ? -MaxVal : MaxVal); - } - - protected override void ProcessValueMin(ref DvInt4 val) - { - var raw = val.RawValue; - if (raw < Stat && raw != DvInt4.RawNA) - Stat = raw; - } - - protected override void ProcessValueMax(ref DvInt4 val) - { - var raw = val.RawValue; - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - // If sparsity occurred, fold in a zero. - if (ValueCount > (ulong)ValuesProcessed) - { - var def = default(DvInt4); - ProcValueDelegate(ref def); - } - return (DvInt4)Stat; - } - } - - public sealed class MinMaxAggregatorBySlot : MinMaxAggregatorBySlot - { - public MinMaxAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, type, cursor, col, returnMax) - { - int bound = (int)(ReturnMax ? -MaxVal : MaxVal); - for (int i = 0; i < Stat.Length; i++) - Stat[i] = bound; - } - - protected override void ProcessValueMin(ref DvInt4 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw < Stat[slot] && raw != DvInt4.RawNA) - Stat[slot] = raw; - } - - protected override void ProcessValueMax(ref DvInt4 val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = val.RawValue; - if (raw > Stat[slot]) - Stat[slot] = raw; - } - - public override object GetStat() - { - DvInt4[] stat = new DvInt4[Stat.Length]; - // Account for defaults resulting from sparsity. - for (int slot = 0; slot < Stat.Length; slot++) - { - if (GetValuesProcessed(slot) < RowCount) - { - var def = default(DvInt4); - ProcValueDelegate(ref def, slot); - } - stat[slot] = (DvInt4)Stat[slot]; - } - return stat; - } - } - } - - private static class Long - { - private const long MaxVal = long.MaxValue; - - public sealed class MeanAggregatorOne : StatAggregator - { - // Converts between TItem and long. - private Converter _converter; - - public MeanAggregatorOne(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - _converter = CreateConverter(type); - } - - protected override void ProcessRow(ref TItem val) - { - Stat.Update(_converter.ToLong(val), MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, RowCount, MaxVal); - return _converter.FromLong(val); - } - } - - public sealed class MeanAggregatorAcrossSlots : StatAggregatorAcrossSlots - { - private Converter _converter; - - public MeanAggregatorAcrossSlots(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, cursor, col) - { - _converter = CreateConverter(type); - } - - protected override void ProcessValue(ref TItem val) - { - Stat.Update(_converter.ToLong(val), MaxVal); - } - - public override object GetStat() - { - long val = Stat.GetCurrentValue(Ch, ValueCount, MaxVal); - return _converter.FromLong(val); - } - } - - public sealed class MeanAggregatorBySlot : StatAggregatorBySlot - { - private Converter _converter; - - public MeanAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col) - : base(ch, type, cursor, col) - { - _converter = CreateConverter(type); - } - - protected override void ProcessValue(ref TItem val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - Stat[slot].Update(_converter.ToLong(val), MaxVal); - } - - public override object GetStat() - { - TItem[] stat = new TItem[Stat.Length]; - for (int slot = 0; slot < stat.Length; slot++) - { - long val = Stat[slot].GetCurrentValue(Ch, RowCount, MaxVal); - stat[slot] = _converter.FromLong(val); - } - return stat; - } - } - - public sealed class MinMaxAggregatorOne : MinMaxAggregatorOne - { - private Converter _converter; - - public MinMaxAggregatorOne(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = ReturnMax ? -MaxVal : MaxVal; - _converter = CreateConverter(type); - } - - protected override void ProcessValueMin(ref TItem val) - { - var raw = _converter.ToLong(val); - if (raw < Stat && -MaxVal <= raw) - Stat = raw; - } - - protected override void ProcessValueMax(ref TItem val) - { - var raw = _converter.ToLong(val); - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - return _converter.FromLong(Stat); - } - } - - public sealed class MinMaxAggregatorAcrossSlots : MinMaxAggregatorAcrossSlots - { - private Converter _converter; - - public MinMaxAggregatorAcrossSlots(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, cursor, col, returnMax) - { - Stat = ReturnMax ? -MaxVal : MaxVal; - _converter = CreateConverter(type); - } - - protected override void ProcessValueMin(ref TItem val) - { - var raw = _converter.ToLong(val); - if (raw < Stat && -MaxVal <= raw) - Stat = raw; - } - - protected override void ProcessValueMax(ref TItem val) - { - var raw = _converter.ToLong(val); - if (raw > Stat) - Stat = raw; - } - - public override object GetStat() - { - // If sparsity occurred, fold in a zero. - if (ValueCount > (ulong)ValuesProcessed) - { - TItem def = default; - ProcValueDelegate(ref def); - } - return _converter.FromLong(Stat); - } - } - - public sealed class MinMaxAggregatorBySlot : MinMaxAggregatorBySlot - { - private Converter _converter; - - public MinMaxAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, int col, bool returnMax) - : base(ch, type, cursor, col, returnMax) - { - long bound = ReturnMax ? -MaxVal : MaxVal; - for (int i = 0; i < Stat.Length; i++) - Stat[i] = bound; - - _converter = CreateConverter(type); - } - - protected override void ProcessValueMin(ref TItem val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = _converter.ToLong(val); - if (raw < Stat[slot] && -MaxVal <= raw) - Stat[slot] = raw; - } - - protected override void ProcessValueMax(ref TItem val, int slot) - { - Ch.Assert(0 <= slot && slot < Stat.Length); - var raw = _converter.ToLong(val); - if (raw > Stat[slot]) - Stat[slot] = raw; - } - - public override object GetStat() - { - TItem[] stat = new TItem[Stat.Length]; - // Account for defaults resulting from sparsity. - for (int slot = 0; slot < Stat.Length; slot++) - { - if (GetValuesProcessed(slot) < RowCount) - { - var def = default(TItem); - ProcValueDelegate(ref def, slot); - } - stat[slot] = _converter.FromLong(Stat[slot]); - } - return stat; - } - } - - private static Converter CreateConverter(ColumnType type) - { - Contracts.AssertValue(type); - Contracts.Assert(typeof(TItem) == type.ItemType.RawType); - Converter converter; - if (type.ItemType.IsTimeSpan) - converter = new TSConverter(); - else if (type.ItemType.IsDateTime) - converter = new DTConverter(); - else - { - Contracts.Assert(type.ItemType.RawKind == DataKind.I8); - converter = new I8Converter(); - } - return (Converter)converter; - } - - /// - /// The base class for conversions from types to long. - /// - private abstract class Converter - { - } - - private abstract class Converter : Converter - { - public abstract long ToLong(T val); - public abstract T FromLong(long val); - } - - private sealed class I8Converter : Converter - { - public override long ToLong(DvInt8 val) - { - return val.RawValue; - } - - public override DvInt8 FromLong(long val) - { - Contracts.Assert(DvInt8.RawNA != val); - return (DvInt8)val; - } - } - - private sealed class TSConverter : Converter - { - public override long ToLong(DvTimeSpan val) - { - return val.Ticks.RawValue; - } - - public override DvTimeSpan FromLong(long val) - { - Contracts.Assert(DvInt8.RawNA != val); - return new DvTimeSpan(val); - } - } - - private sealed class DTConverter : Converter - { - public override long ToLong(DvDateTime val) - { - return val.Ticks.RawValue; - } - - public override DvDateTime FromLong(long val) - { - Contracts.Assert(0 <= val && val <= DvDateTime.MaxTicks); - return new DvDateTime(val); - } - } - } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Transforms/TermLookupTransform.cs b/src/Microsoft.ML.Transforms/TermLookupTransform.cs index 96acf4aa6e..98dc7a0933 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransform.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransform.cs @@ -115,7 +115,7 @@ public static VecValueMap CreateVector(VectorType type) public abstract void Train(IExceptionContext ectx, IRowCursor cursor, int colTerm, int colValue); - public abstract Delegate GetGetter(ValueGetter getSrc); + public abstract Delegate GetGetter(ValueGetter> getSrc); } /// @@ -146,22 +146,18 @@ public override void Train(IExceptionContext ectx, IRowCursor cursor, int colTer ectx.Assert(0 <= colValue && colValue < cursor.Schema.ColumnCount); ectx.Assert(cursor.Schema.GetColumnType(colValue).Equals(Type)); - var getTerm = cursor.GetGetter(colTerm); + var getTerm = cursor.GetGetter>(colTerm); var getValue = cursor.GetGetter(colValue); var terms = new NormStr.Pool(); var values = new List(); - DvText term = default(DvText); + ReadOnlyMemory term = default; while (cursor.MoveNext()) { getTerm(ref term); // REVIEW: Should we trim? - term = term.Trim(); - // REVIEW: Should we handle mapping "missing" to something? - if (term.IsNA) - throw ectx.Except("Missing term in lookup data around row: {0}", values.Count); - - var nstr = term.AddToPool(terms); + term = ReadOnlyMemoryUtils.TrimSpaces(term); + var nstr = ReadOnlyMemoryUtils.AddToPool(term, terms); if (nstr.Id != values.Count) throw ectx.Except("Duplicate term in lookup data: '{0}'", nstr); @@ -179,7 +175,7 @@ public override void Train(IExceptionContext ectx, IRowCursor cursor, int colTer /// /// Given the term getter, produce a value getter from this value map. /// - public override Delegate GetGetter(ValueGetter getTerm) + public override Delegate GetGetter(ValueGetter> getTerm) { Contracts.Assert(_terms != null); Contracts.Assert(_values != null); @@ -188,15 +184,15 @@ public override Delegate GetGetter(ValueGetter getTerm) return GetGetterCore(getTerm); } - private ValueGetter GetGetterCore(ValueGetter getTerm) + private ValueGetter GetGetterCore(ValueGetter> getTerm) { - var src = default(DvText); + var src = default(ReadOnlyMemory); return (ref TRes dst) => { getTerm(ref src); - src = src.Trim(); - var nstr = src.FindInPool(_terms); + src = ReadOnlyMemoryUtils.TrimSpaces(src); + var nstr = ReadOnlyMemoryUtils.FindInPool(src, _terms); if (nstr == null) GetMissing(ref dst); else @@ -225,11 +221,13 @@ public OneValueMap(PrimitiveType type) // REVIEW: This uses the fact that standard conversions map NA to NA to get the NA for TRes. // We should probably have a mapping from type to its bad value somewhere, perhaps in Conversions. bool identity; - ValueMapper conv; - if (Conversions.Instance.TryGetStandardConversion(TextType.Instance, type, + ValueMapper, TRes> conv; + if (Conversions.Instance.TryGetStandardConversion, TRes>(TextType.Instance, type, out conv, out identity)) { - var bad = DvText.NA; + //Empty string will map to NA for R4 and R8, the only two types that can + //handle missing values. + var bad = String.Empty.AsMemory(); conv(ref bad, ref _badValue); } } @@ -374,9 +372,9 @@ private static IComponentFactory GetLoaderFacto var data = TextLoader.ReadFile(host, txtArgs, new MultiFileSource(filename)); using (var cursor = data.GetRowCursor(c => true)) { - var getTerm = cursor.GetGetter(0); - var getVal = cursor.GetGetter(1); - DvText txt = default(DvText); + var getTerm = cursor.GetGetter>(0); + var getVal = cursor.GetGetter>(1); + ReadOnlyMemory txt = default; using (var ch = host.Start("Creating Text Lookup Loader")) { @@ -405,7 +403,7 @@ private static IComponentFactory GetLoaderFacto //If parsing as a ulong fails, we increment the counter for the non-key values. else { - var term = default(DvText); + var term = default(ReadOnlyMemory); getTerm(ref term); if (countNonKeys < 5) ch.Warning("Term '{0}' in mapping file is mapped to non key value '{1}'", term, txt); @@ -703,7 +701,7 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou Host.Assert(0 <= iinfo && iinfo < Infos.Length); disposer = null; - var getSrc = GetSrcGetter(input, iinfo); + var getSrc = GetSrcGetter>(input, iinfo); return _valueMap.GetGetter(getSrc); } } diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index e1ea2974b3..47c6762d1b 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -171,7 +171,7 @@ private void SetMetadata() // Slot names should propagate. using (var bldr = md.BuildMetadata(iinfo, Source.Schema, info.Source, MetadataUtils.Kinds.SlotNames)) { - bldr.AddGetter>(MetadataUtils.Kinds.KeyValues, + bldr.AddGetter>>(MetadataUtils.Kinds.KeyValues, MetadataUtils.GetNamesType(_type.ItemType.KeyCount), GetKeyValues); } } @@ -181,7 +181,7 @@ private void SetMetadata() /// /// Get the key values (chars) corresponding to keys in the output columns. /// - private void GetKeyValues(int iinfo, ref VBuffer dst) + private void GetKeyValues(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); @@ -211,10 +211,10 @@ private void GetKeyValues(int iinfo, ref VBuffer dst) var values = dst.Values; if (Utils.Size(values) < CharsCount) - values = new DvText[CharsCount]; + values = new ReadOnlyMemory[CharsCount]; for (int i = 0; i < CharsCount; i++) - values[i] = new DvText(keyValuesStr, keyValuesBoundaries[i], keyValuesBoundaries[i + 1]); - dst = new VBuffer(CharsCount, values, dst.Indices); + values[i] = keyValuesStr.AsMemory().Slice(keyValuesBoundaries[i], keyValuesBoundaries[i + 1] - keyValuesBoundaries[i]); + dst = new VBuffer>(CharsCount, values, dst.Indices); } private void AppendCharRepr(char c, StringBuilder bldr) @@ -368,14 +368,14 @@ private ValueGetter> MakeGetterOne(IRow input, int iinfo) Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsText); - var getSrc = GetSrcGetter(input, iinfo); - var src = default(DvText); + var getSrc = GetSrcGetter>(input, iinfo); + var src = default(ReadOnlyMemory); return (ref VBuffer dst) => { getSrc(ref src); - var len = src.HasChars ? (_useMarkerChars ? src.Length + TextMarkersCount : src.Length) : 0; + var len = !src.IsEmpty ? (_useMarkerChars ? src.Length + TextMarkersCount : src.Length) : 0; var values = dst.Values; if (len > 0) { @@ -385,8 +385,9 @@ private ValueGetter> MakeGetterOne(IRow input, int iinfo) int index = 0; if (_useMarkerChars) values[index++] = TextStartMarker; + var span = src.Span; for (int ich = 0; ich < src.Length; ich++) - values[index++] = src[ich]; + values[index++] = span[ich]; if (_useMarkerChars) values[index++] = TextEndMarker; Contracts.Assert(index == len); @@ -405,8 +406,8 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int cv = Infos[iinfo].TypeSrc.VectorSize; Contracts.Assert(cv >= 0); - var getSrc = GetSrcGetter>(input, iinfo); - var src = default(VBuffer); + var getSrc = GetSrcGetter>>(input, iinfo); + var src = default(VBuffer>); ValueGetter> getterWithStartEndSep = (ref VBuffer dst) => { @@ -415,7 +416,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int len = 0; for (int i = 0; i < src.Count; i++) { - if (src.Values[i].HasChars) + if (!src.Values[i].IsEmpty) { len += src.Values[i].Length; if (_useMarkerChars) @@ -432,12 +433,13 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int index = 0; for (int i = 0; i < src.Count; i++) { - if (!src.Values[i].HasChars) + if (src.Values[i].IsEmpty) continue; if (_useMarkerChars) values[index++] = TextStartMarker; + var span = src.Values[i].Span; for (int ich = 0; ich < src.Values[i].Length; ich++) - values[index++] = src.Values[i][ich]; + values[index++] = span[ich]; if (_useMarkerChars) values[index++] = TextEndMarker; } @@ -455,7 +457,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) for (int i = 0; i < src.Count; i++) { - if (src.Values[i].HasChars) + if (!src.Values[i].IsEmpty) { len += src.Values[i].Length; @@ -475,10 +477,10 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int index = 0; - // VBuffer can be a result of either concatenating text columns together + // ReadOnlyMemory can be a result of either concatenating text columns together // or application of word tokenizer before char tokenizer in TextTransform. // - // Considering VBuffer as a single text stream. + // Considering VBuffer as a single text stream. // Therefore, prepend and append start and end markers only once i.e. at the start and at end of vector. // Insert UnitSeparator after every piece of text in the vector. if (_useMarkerChars) @@ -486,16 +488,15 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) for (int i = 0; i < src.Count; i++) { - if (!src.Values[i].HasChars) + if (src.Values[i].IsEmpty) continue; if (i > 0) values[index++] = UnitSeparator; + var span = src.Values[i].Span; for (int ich = 0; ich < src.Values[i].Length; ich++) - { - values[index++] = src.Values[i][ich]; - } + values[index++] = span[ich]; } if (_useMarkerChars) diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 0b3d52854f..380399d96a 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -403,7 +403,7 @@ public static LdaTransform Create(IHostEnvironment env, ModelLoadContext ctx, ID public string GetTopicSummary() { StringWriter writer = new StringWriter(); - VBuffer slotNames = default(VBuffer); + VBuffer> slotNames = default; for (int i = 0; i < _ldas.Length; i++) { GetSlotNames(i, ref slotNames); @@ -427,7 +427,7 @@ public override void Save(ModelSaveContext ctx) ctx.Writer.Write(sizeof(Float)); SaveBase(ctx); Host.Assert(_ldas.Length == Infos.Length); - VBuffer slotNames = default(VBuffer); + VBuffer> slotNames = default; for (int i = 0; i < _ldas.Length; i++) { GetSlotNames(i, ref slotNames); @@ -435,13 +435,13 @@ public override void Save(ModelSaveContext ctx) } } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); if (Source.Schema.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.ValueCount)) Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref dst); else - dst = default(VBuffer); + dst = default(VBuffer>); } private static string TestType(ColumnType t) @@ -691,7 +691,7 @@ public LdaState(IExceptionContext ectx, ModelLoadContext ctx) } } - public Action GetTopicSummaryWriter(VBuffer mapping) + public Action GetTopicSummaryWriter(VBuffer> mapping) { Action writeAction; @@ -715,7 +715,7 @@ public Action GetTopicSummaryWriter(VBuffer mapping) writeAction = writer => { - DvText slotName = default(DvText); + ReadOnlyMemory slotName = default; for (int i = 0; i < _ldaTrainer.NumTopic; i++) { KeyValuePair[] topicSummaryVector = _ldaTrainer.GetTopicSummary(i); @@ -733,7 +733,7 @@ public Action GetTopicSummaryWriter(VBuffer mapping) return writeAction; } - public void Save(ModelSaveContext ctx, bool saveText, VBuffer mapping) + public void Save(ModelSaveContext ctx, bool saveText, VBuffer> mapping) { Contracts.AssertValue(ctx); long memBlockSize = 0; diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs index 548b80cb8c..8ab64f9da4 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs @@ -213,7 +213,7 @@ protected override void GetMetadataCore(string kind, int iinfo, ref TVal { if (kind == MetadataUtils.Kinds.SlotNames && _parent._slotNames != null && _parent._slotNames[iinfo].Length > 0) { - MetadataUtils.MetadataGetter> getTerms = _parent.GetTerms; + MetadataUtils.MetadataGetter>> getTerms = _parent.GetTerms; getTerms.Marshal(iinfo, ref value); return; } @@ -323,7 +323,7 @@ private static VersionInfo GetVersionInfo() private readonly Bindings _bindings; private readonly ColInfoEx[] _exes; - private readonly VBuffer[] _slotNames; + private readonly VBuffer>[] _slotNames; private readonly ColumnType[] _slotNamesTypes; private const string RegistrationName = "NgramHash"; @@ -447,7 +447,7 @@ private static int GetAndVerifyInvertHashMaxCount(Arguments args, Column col, Co return invertHashMaxCount; } - private void GetTerms(int iinfo, ref VBuffer dst) + private void GetTerms(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < _exes.Length); Host.Assert(_slotNames[iinfo].Length > 0); @@ -1005,9 +1005,9 @@ public NgramIdFinder Decorate(int iinfo, NgramIdFinder finder) }; } - public VBuffer[] SlotNamesMetadata(out ColumnType[] types) + public VBuffer>[] SlotNamesMetadata(out ColumnType[] types) { - var values = new VBuffer[_iinfoToCollector.Length]; + var values = new VBuffer>[_iinfoToCollector.Length]; types = new ColumnType[_iinfoToCollector.Length]; for (int iinfo = 0; iinfo < _iinfoToCollector.Length; ++iinfo) { diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 546c46479d..a696f2b092 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -303,7 +303,7 @@ public override void Save(ModelSaveContext ctx) ctx.Writer.Write(sizeof(Float)); SaveBase(ctx); - var ngramsNames = default(VBuffer); + var ngramsNames = default(VBuffer>); for (int i = 0; i < _exes.Length; i++) { _exes[i].Save(ctx); @@ -358,7 +358,7 @@ private void InitColumnTypeAndMetadata(out VectorType[] types, out VectorType[] if (_ngramMaps[iinfo].Count > 0) { slotNamesTypes[iinfo] = new VectorType(TextType.Instance, _ngramMaps[iinfo].Count); - bldr.AddGetter>(MetadataUtils.Kinds.SlotNames, + bldr.AddGetter>>(MetadataUtils.Kinds.SlotNames, slotNamesTypes[iinfo], GetSlotNames); } } @@ -366,7 +366,7 @@ private void InitColumnTypeAndMetadata(out VectorType[] types, out VectorType[] md.Seal(); } - private void GetSlotNames(int iinfo, ref VBuffer dst) + private void GetSlotNames(int iinfo, ref VBuffer> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(_slotNamesTypes[iinfo] != null); @@ -374,7 +374,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) var keyCount = Infos[iinfo].TypeSrc.ItemType.KeyCount; Host.Assert(Source.Schema.HasKeyNames(Infos[iinfo].Source, keyCount)); - var unigramNames = new VBuffer(); + var unigramNames = new VBuffer>(); // Get the key values of the unigrams. Source.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, Infos[iinfo].Source, ref unigramNames); @@ -397,13 +397,13 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) // Get the unigrams composing the current ngram. ComposeNgramString(ngram, n, sb, keyCount, unigramNames.GetItemOrDefault); - values[slot] = new DvText(sb.ToString()); + values[slot] = sb.ToString().AsMemory(); } - dst = new VBuffer(ngramCount, values, dst.Indices); + dst = new VBuffer>(ngramCount, values, dst.Indices); } - private delegate void TermGetter(int index, ref DvText term); + private delegate void TermGetter(int index, ref ReadOnlyMemory term); private void ComposeNgramString(uint[] ngram, int count, StringBuilder sb, int keyCount, TermGetter termGetter) { @@ -412,7 +412,7 @@ private void ComposeNgramString(uint[] ngram, int count, StringBuilder sb, int k Host.Assert(keyCount > 0); sb.Clear(); - DvText term = default(DvText); + ReadOnlyMemory term = default; string sep = ""; for (int iterm = 0; iterm < count; iterm++) { @@ -424,7 +424,7 @@ private void ComposeNgramString(uint[] ngram, int count, StringBuilder sb, int k else { termGetter((int)unigram - 1, ref term); - term.AddToStringBuilder(sb); + sb.AppendMemory(term); } } } diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs index b9ca08fe06..d278326b33 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs @@ -244,7 +244,7 @@ private static VersionInfo GetVersionInfo() private static readonly ColumnType _outputType = new VectorType(TextType.Instance); private static volatile NormStr.Pool[] _stopWords; - private static volatile Dictionary _langsDictionary; + private static volatile Dictionary, Language> _langsDictionary; private const Language DefaultLanguage = Language.English; private const string RegistrationName = "StopWordsRemover"; @@ -270,14 +270,14 @@ private static NormStr.Pool[] StopWords } } - private static Dictionary LangsDictionary + private static Dictionary, Language> LangsDictionary { get { if (_langsDictionary == null) { var langsDictionary = Enum.GetValues(typeof(Language)).Cast() - .ToDictionary(lang => new DvText(lang.ToString())); + .ToDictionary(lang => lang.ToString().AsMemory()); Interlocked.CompareExchange(ref _langsDictionary, langsDictionary, null); } @@ -449,16 +449,16 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou var ex = _exes[iinfo]; Language stopWordslang = ex.Lang; - var lang = default(DvText); - var getLang = ex.LangsColIndex >= 0 ? input.GetGetter(ex.LangsColIndex) : null; + var lang = default(ReadOnlyMemory); + var getLang = ex.LangsColIndex >= 0 ? input.GetGetter>(ex.LangsColIndex) : null; - var getSrc = GetSrcGetter>(input, iinfo); - var src = default(VBuffer); + var getSrc = GetSrcGetter>>(input, iinfo); + var src = default(VBuffer>); var buffer = new StringBuilder(); - var list = new List(); + var list = new List>(); - ValueGetter> del = - (ref VBuffer dst) => + ValueGetter>> del = + (ref VBuffer> dst) => { var langToUse = stopWordslang; UpdateLanguage(ref langToUse, getLang, ref lang); @@ -468,10 +468,10 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou for (int i = 0; i < src.Count; i++) { - if (!src.Values[i].HasChars) + if (src.Values[i].IsEmpty) continue; buffer.Clear(); - src.Values[i].AddLowerCaseToStringBuilder(buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Values[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (StopWords[(int)langToUse].Get(buffer) == null) @@ -484,13 +484,13 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou return del; } - private void UpdateLanguage(ref Language langToUse, ValueGetter getLang, ref DvText langTxt) + private void UpdateLanguage(ref Language langToUse, ValueGetter> getLang, ref ReadOnlyMemory langTxt) { if (getLang != null) { getLang(ref langTxt); Language lang; - if (!langTxt.IsNA && LangsDictionary.TryGetValue(langTxt, out lang)) + if (LangsDictionary.TryGetValue(langTxt, out lang)) langToUse = lang; } @@ -694,24 +694,24 @@ private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase load ch.Warning("Explicit stopwords list specified. Data file arguments will be ignored"); } - var src = default(DvText); + var src = default(ReadOnlyMemory); stopWordsMap = new NormStr.Pool(); var buffer = new StringBuilder(); - var stopwords = new DvText(loaderArgs.Stopwords); - stopwords = stopwords.Trim(); - if (stopwords.HasChars) + var stopwords = loaderArgs.Stopwords.AsMemory(); + stopwords = ReadOnlyMemoryUtils.TrimSpaces(stopwords); + if (!stopwords.IsEmpty) { bool warnEmpty = true; for (bool more = true; more;) { - DvText stopword; - more = stopwords.SplitOne(',', out stopword, out stopwords); - stopword = stopword.Trim(); - if (stopword.HasChars) + ReadOnlyMemory stopword; + more = ReadOnlyMemoryUtils.SplitOne(stopwords, ',', out stopword, out stopwords); + stopword = ReadOnlyMemoryUtils.TrimSpaces(stopword); + if (!stopword.IsEmpty) { buffer.Clear(); - stopword.AddLowerCaseToStringBuilder(buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopwords.Span, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) @@ -727,12 +727,12 @@ private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase load bool warnEmpty = true; foreach (string word in loaderArgs.Stopword) { - var stopword = new DvText(word); - stopword = stopword.Trim(); - if (stopword.HasChars) + var stopword = word.AsSpan(); + stopword = stopword.Trim(' '); + if (!stopword.IsEmpty) { buffer.Clear(); - stopword.AddLowerCaseToStringBuilder(buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(stopword, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) @@ -756,14 +756,14 @@ private void LoadStopWords(IHostEnvironment env, IChannel ch, ArgumentsBase load using (var cursor = loader.GetRowCursor(col => col == colSrc)) { bool warnEmpty = true; - var getter = cursor.GetGetter(colSrc); + var getter = cursor.GetGetter>(colSrc); while (cursor.MoveNext()) { getter(ref src); - if (src.HasChars) + if (!src.IsEmpty) { buffer.Clear(); - src.AddLowerCaseToStringBuilder(buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Span, buffer); stopWordsMap.Add(buffer); } else if (warnEmpty) @@ -902,7 +902,7 @@ public override void Save(ModelSaveContext ctx) foreach (var nstr in _stopWordsMap) { Host.Assert(nstr.Id == id); - ctx.SaveString(nstr); + ctx.SaveString(nstr.Value); id++; } @@ -928,23 +928,23 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText); disposer = null; - var getSrc = GetSrcGetter>(input, iinfo); - var src = default(VBuffer); + var getSrc = GetSrcGetter>>(input, iinfo); + var src = default(VBuffer>); var buffer = new StringBuilder(); - var list = new List(); + var list = new List>(); - ValueGetter> del = - (ref VBuffer dst) => + ValueGetter>> del = + (ref VBuffer> dst) => { getSrc(ref src); list.Clear(); for (int i = 0; i < src.Count; i++) { - if (!src.Values[i].HasChars) + if (src.Values[i].IsEmpty) continue; buffer.Clear(); - src.Values[i].AddLowerCaseToStringBuilder(buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Values[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (_stopWordsMap.Get(buffer) == null) diff --git a/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs b/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs index 9565b4b445..c96b1511d1 100644 --- a/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs @@ -25,7 +25,7 @@ namespace Microsoft.ML.Runtime.TextAnalytics { /// /// A text normalization transform that allows normalizing text case, removing diacritical marks, punctuation marks and/or numbers. - /// The transform operates on text input as well as vector of tokens/text (vector of DvText). + /// The transform operates on text input as well as vector of tokens/text (vector of ReadOnlyMemory). /// public sealed class TextNormalizerTransform : OneToOneTransformBase { @@ -76,7 +76,7 @@ public sealed class Arguments } internal const string Summary = "A text normalization transform that allows normalizing text case, removing diacritical marks, punctuation marks and/or numbers." + - " The transform operates on text input as well as vector of tokens/text (vector of DvText)."; + " The transform operates on text input as well as vector of tokens/text (vector of ReadOnlyMemory)."; public const string LoaderSignature = "TextNormalizerTransform"; private static VersionInfo GetVersionInfo() @@ -256,31 +256,31 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou return MakeGetterOne(input, iinfo); } - private ValueGetter MakeGetterOne(IRow input, int iinfo) + private ValueGetter> MakeGetterOne(IRow input, int iinfo) { Contracts.Assert(Infos[iinfo].TypeSrc.IsText); - var getSrc = GetSrcGetter(input, iinfo); + var getSrc = GetSrcGetter>(input, iinfo); Host.AssertValue(getSrc); - var src = default(DvText); + var src = default(ReadOnlyMemory); var buffer = new StringBuilder(); return - (ref DvText dst) => + (ref ReadOnlyMemory dst) => { getSrc(ref src); NormalizeSrc(ref src, ref dst, buffer); }; } - private ValueGetter> MakeGetterVec(IRow input, int iinfo) + private ValueGetter>> MakeGetterVec(IRow input, int iinfo) { - var getSrc = GetSrcGetter>(input, iinfo); + var getSrc = GetSrcGetter>>(input, iinfo); Host.AssertValue(getSrc); - var src = default(VBuffer); + var src = default(VBuffer>); var buffer = new StringBuilder(); - var list = new List(); - var temp = default(DvText); + var list = new List>(); + var temp = default(ReadOnlyMemory); return - (ref VBuffer dst) => + (ref VBuffer> dst) => { getSrc(ref src); list.Clear(); @@ -295,11 +295,11 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) }; } - private void NormalizeSrc(ref DvText src, ref DvText dst, StringBuilder buffer) + private void NormalizeSrc(ref ReadOnlyMemory src, ref ReadOnlyMemory dst, StringBuilder buffer) { Host.AssertValue(buffer); - if (!src.HasChars) + if (src.IsEmpty) { dst = src; return; @@ -307,18 +307,16 @@ private void NormalizeSrc(ref DvText src, ref DvText dst, StringBuilder buffer) buffer.Clear(); - int ichMin; - int ichLim; - string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim); - int i = ichMin; - int min = ichMin; - while (i < ichLim) + int i = 0; + int min = 0; + var span = src.Span; + while (i < src.Length) { - char ch = text[i]; + char ch = span[i]; if (!_keepPunctuations && char.IsPunctuation(ch) || !_keepNumbers && char.IsNumber(ch)) { // Append everything before ch and ignore ch. - buffer.Append(text, min, i - min); + buffer.AppendSpan(span.Slice(min, i - min)); min = i + 1; i++; continue; @@ -328,7 +326,7 @@ private void NormalizeSrc(ref DvText src, ref DvText dst, StringBuilder buffer) { if (IsCombiningDiacritic(ch)) { - buffer.Append(text, min, i - min); + buffer.AppendSpan(span.Slice(min, i - min)); min = i + 1; i++; continue; @@ -343,26 +341,26 @@ private void NormalizeSrc(ref DvText src, ref DvText dst, StringBuilder buffer) else if (_case == CaseNormalizationMode.Upper) ch = CharUtils.ToUpperInvariant(ch); - if (ch != text[i]) + if (ch != src.Span[i]) { - buffer.Append(text, min, i - min).Append(ch); + buffer.AppendSpan(span.Slice(min, i - min)).Append(ch); min = i + 1; } i++; } - Host.Assert(i == ichLim); + Host.Assert(i == src.Length); int len = i - min; - if (ichMin == min) + if (min == 0) { Host.Assert(src.Length == len); dst = src; } else { - buffer.Append(text, min, len); - dst = new DvText(buffer.ToString()); + buffer.AppendSpan(span.Slice(min, len)); + dst = buffer.ToString().AsMemory(); } } diff --git a/src/Microsoft.ML.Transforms/Text/TextTransform.cs b/src/Microsoft.ML.Transforms/Text/TextTransform.cs index 7a5cf4bc7b..cc801dd311 100644 --- a/src/Microsoft.ML.Transforms/Text/TextTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/TextTransform.cs @@ -228,7 +228,7 @@ private bool UsesHashExtractors } // If we're performing language auto detection, or either of our extractors aren't hashing then - // we need all the input text concatenated into a single Vect, for the LanguageDetectionTransform + // we need all the input text concatenated into a single ReadOnlyMemory, for the LanguageDetectionTransform // to operate on the entire text vector, and for the Dictionary feature extractor to build its bound dictionary // correctly. public bool NeedInitialSourceColumnConcatTransform diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs index 1340855547..4341afbb92 100644 --- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs @@ -182,7 +182,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV } /// - /// A transform that turns a collection of tokenized text (vector of DvText), or vectors of keys into numerical + /// A transform that turns a collection of tokenized text (vector of ReadOnlyMemory), or vectors of keys into numerical /// feature vectors. The feature vectors are counts of ngrams (sequences of consecutive *tokens* -words or keys- /// of length 1-n). /// @@ -275,7 +275,7 @@ public sealed class Arguments : ArgumentsBase public Column[] Column; } - internal const string Summary = "A transform that turns a collection of tokenized text (vector of DvText), or vectors of keys into numerical " + + internal const string Summary = "A transform that turns a collection of tokenized text ReadOnlyMemory, or vectors of keys into numerical " + "feature vectors. The feature vectors are counts of ngrams (sequences of consecutive *tokens* -words or keys- of length 1-n)."; internal const string LoaderSignature = "NgramExtractor"; diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index 270a384379..5a5b382d75 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -114,12 +114,9 @@ public void AddWordVector(IChannel ch, string word, float[] wordVector) } } - public bool GetWordVector(ref DvText word, float[] wordVector) + public bool GetWordVector(ref ReadOnlyMemory word, float[] wordVector) { - if (word.IsNA) - return false; - string rawWord = word.GetRawUnderlyingBufferInfo(out int ichMin, out int ichLim); - NormStr str = _pool.Get(rawWord, ichMin, ichLim); + NormStr str = _pool.Get(word); if (str != null) { _wordVectors.CopyTo(str.Id * Dimension, wordVector, Dimension); @@ -339,15 +336,14 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose private ValueGetter> GetGetterVec(IRow input, int iinfo) { Host.AssertValue(input); - Host.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); var colType = input.Schema.GetColumnType(ColMapNewToOld[iinfo]); Host.Assert(colType.IsVector); Host.Assert(colType.ItemType.IsText); - var srcGetter = input.GetGetter>(ColMapNewToOld[iinfo]); - var src = default(VBuffer); + var srcGetter = input.GetGetter>>(ColMapNewToOld[iinfo]); + var src = default(VBuffer>); int dimension = _parent._currentVocab.Dimension; float[] wordVector = new float[_parent._currentVocab.Dimension]; diff --git a/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs index 9417ea6260..b51cccd220 100644 --- a/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs @@ -177,7 +177,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV } /// - /// A transform that turns a collection of tokenized text (vector of DvText) into numerical feature vectors + /// A transform that turns a collection of tokenized text (vector of ReadOnlyMemory) into numerical feature vectors /// using the hashing trick. /// public static class NgramHashExtractorTransform @@ -320,7 +320,7 @@ public sealed class Arguments : ArgumentsBase public Column[] Column; } - internal const string Summary = "A transform that turns a collection of tokenized text (vector of DvText) into numerical feature vectors using the hashing trick."; + internal const string Summary = "A transform that turns a collection of tokenized text (vector of ReadOnlyMemory) into numerical feature vectors using the hashing trick."; internal const string LoaderSignature = "NgramHashExtractor"; diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 2e38af34e2..35bfbd925c 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -35,7 +35,7 @@ public interface ITokenizeTransform : IDataTransform { } - // The input for this transform is a DvText or a vector of DvTexts, and its output is a vector of DvTexts, + // The input for this transform is a ReadOnlyMemory or a vector of ReadOnlyMemory, and its output is a vector of ReadOnlyMemory, // corresponding to the tokens in the input text, split using a set of user specified separator characters. // Empty strings and strings containing only spaces are dropped. /// @@ -160,7 +160,7 @@ public DelimitedTokenizeTransform(IHostEnvironment env, Arguments args, IDataVie : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestIsTextItem) { - // REVIEW: Need to decide whether to inject an NA token between slots in VBuffer inputs. + // REVIEW: Need to decide whether to inject an NA token between slots in ReadOnlyMemory inputs. Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); @@ -182,7 +182,7 @@ public DelimitedTokenizeTransform(IHostEnvironment env, TokenizeArguments args, Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column)); - // REVIEW: Need to decide whether to inject an NA token between slots in VBuffer inputs. + // REVIEW: Need to decide whether to inject an NA token between slots in ReadOnlyMemory inputs. Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(columns)); @@ -294,18 +294,18 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou return MakeGetterVec(input, iinfo); } - private ValueGetter> MakeGetterOne(IRow input, int iinfo) + private ValueGetter>> MakeGetterOne(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsText); - var getSrc = GetSrcGetter(input, iinfo); - var src = default(DvText); - var terms = new List(); + var getSrc = GetSrcGetter>(input, iinfo); + var src = default(ReadOnlyMemory); + var terms = new List>(); var separators = _exes[iinfo].Separators; return - (ref VBuffer dst) => + (ref VBuffer> dst) => { getSrc(ref src); terms.Clear(); @@ -316,15 +316,15 @@ private ValueGetter> MakeGetterOne(IRow input, int iinfo) if (terms.Count > 0) { if (Utils.Size(values) < terms.Count) - values = new DvText[terms.Count]; + values = new ReadOnlyMemory[terms.Count]; terms.CopyTo(values); } - dst = new VBuffer(terms.Count, values, dst.Indices); + dst = new VBuffer>(terms.Count, values, dst.Indices); }; } - private ValueGetter> MakeGetterVec(IRow input, int iinfo) + private ValueGetter>> MakeGetterVec(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsVector); @@ -333,13 +333,13 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int cv = Infos[iinfo].TypeSrc.VectorSize; Contracts.Assert(cv >= 0); - var getSrc = GetSrcGetter>(input, iinfo); - var src = default(VBuffer); - var terms = new List(); + var getSrc = GetSrcGetter>>(input, iinfo); + var src = default(VBuffer>); + var terms = new List>(); var separators = _exes[iinfo].Separators; return - (ref VBuffer dst) => + (ref VBuffer> dst) => { getSrc(ref src); terms.Clear(); @@ -351,39 +351,39 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) if (terms.Count > 0) { if (Utils.Size(values) < terms.Count) - values = new DvText[terms.Count]; + values = new ReadOnlyMemory[terms.Count]; terms.CopyTo(values); } - dst = new VBuffer(terms.Count, values, dst.Indices); + dst = new VBuffer>(terms.Count, values, dst.Indices); }; } - private void AddTerms(DvText txt, char[] separators, List terms) + private void AddTerms(ReadOnlyMemory txt, char[] separators, List> terms) { Host.AssertNonEmpty(separators); var rest = txt; if (separators.Length > 1) { - while (rest.HasChars) + while (!rest.IsEmpty) { - DvText term; - rest.SplitOne(separators, out term, out rest); - term = term.Trim(); - if (term.HasChars) + ReadOnlyMemory term; + ReadOnlyMemoryUtils.SplitOne(rest, separators, out term, out rest); + term = ReadOnlyMemoryUtils.TrimSpaces(term); + if (!term.IsEmpty) terms.Add(term); } } else { var separator = separators[0]; - while (rest.HasChars) + while (!rest.IsEmpty) { - DvText term; - rest.SplitOne(separator, out term, out rest); - term = term.Trim(); - if (term.HasChars) + ReadOnlyMemory term; + ReadOnlyMemoryUtils.SplitOne(rest, separator, out term, out rest); + term = ReadOnlyMemoryUtils.TrimSpaces(term); + if (!term.IsEmpty) terms.Add(term); } } diff --git a/src/Microsoft.ML.Transforms/Text/doc.xml b/src/Microsoft.ML.Transforms/Text/doc.xml index 1a39b0b788..e3495e4ca9 100644 --- a/src/Microsoft.ML.Transforms/Text/doc.xml +++ b/src/Microsoft.ML.Transforms/Text/doc.xml @@ -46,8 +46,8 @@ This transform splits the text into words using the separator character(s). - The input for this transform is a DvText or a vector of DvTexts, - and its output is a vector of DvTexts, corresponding to the tokens in the input text. + The input for this transform is a ReadOnlyMemory or a vector of ReadOnlyMemory, + and its output is a vector of ReadOnlyMemory, corresponding to the tokens in the input text. The output is generated by splitting the input text, using a set of user specified separator characters. Empty strings and strings containing only spaces are dropped. This transform is not typically used on its own, but it is one of the transforms composing the Text Featurizer. diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt index eb8fd43aae..730e9c8fa9 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 3.64133739 1 0 1 35 0 -2.796535 1E-15 1.4415419267167138E-15 0 37 0 -1.65404248 1E-15 1.4415419267167138E-15 0 -40 0 +40 0 ? ? ? 0 41 1 1.04337406 0.8947368 0.16046469748481262 1 44 1 4.33966541 1 0 1 45 0 -2.89273548 1E-15 1.4415419267167138E-15 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -2.35179329 1E-15 1.4415419267167138E-15 0 141 0 -2.904073 1E-15 1.4415419267167138E-15 0 144 0 -2.796535 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 147 0 -2.463921 1E-15 1.4415419267167138E-15 0 150 0 -2.8614285 1E-15 1.4415419267167138E-15 0 151 1 3.17632246 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -3.01920986 1E-15 1.4415419267167138E-15 0 156 0 -2.49565363 1E-15 1.4415419267167138E-15 0 161 0 -2.30924 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 167 1 2.38255262 0.8947368 0.16046469748481262 1 169 0 -3.03097248 1E-15 1.4415419267167138E-15 0 171 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.227234 1 0 1 247 1 2.672493 0.8947368 0.16046469748481262 1 248 0 -1.711307 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -2.60319138 1E-15 1.4415419267167138E-15 0 252 0 2.992918 0.8947368 3.2479272984652883 1 254 1 5.35164165 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -2.804134 1E-15 1.4415419267167138E-15 0 271 0 -2.34358668 1E-15 1.4415419267167138E-15 0 272 1 2.73419476 0.8947368 0.16046469748481262 1 -275 0 +275 0 ? ? ? 0 276 0 -2.68139815 1E-15 1.4415419267167138E-15 0 277 0 -2.91167164 1E-15 1.4415419267167138E-15 0 278 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -2.804134 1E-15 1.4415419267167138E-15 0 293 1 3.413344 1 0 1 296 0 1.47483253 0.8947368 3.2479272984652883 1 -297 0 +297 0 ? ? ? 0 299 1 3.53376913 1 0 1 300 1 3.78027344 1 0 1 301 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.45851088 1 0 1 317 1 5.48386 1 0 1 319 0 1.44604874 0.8947368 3.2479272984652883 1 -321 0 +321 0 ? ? ? 0 323 1 3.36483288 1 0 1 327 0 -2.91167164 1E-15 1.4415419267167138E-15 0 328 1 3.03614 0.8947368 0.16046469748481262 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 8.167599 1 0 1 613 0 -2.39343452 1E-15 1.4415419267167138E-15 0 614 0 -2.85382986 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -2.56626129 1E-15 1.4415419267167138E-15 0 619 0 -2.45112467 1E-15 1.4415419267167138E-15 0 621 0 -0.5349846 0.2 0.32192810026182023 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.27471757 1E-15 1.4415419267167138E-15 0 19 0 -2.00358748 1E-15 1.4415419267167138E-15 0 22 0 -2.5426836 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -2.68141246 1E-15 1.4415419267167138E-15 0 26 0 -2.33200574 1E-15 1.4415419267167138E-15 0 27 0 -2.27155375 1E-15 1.4415419267167138E-15 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.546271 1E-15 1.4415419267167138E-15 0 135 0 -1.52827668 1E-15 1.4415419267167138E-15 0 136 0 -2.40711856 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -2.67508459 1E-15 1.4415419267167138E-15 0 142 1 2.1587286 0.9117647 0.13326656969825684 1 143 0 -1.54474568 1E-15 1.4415419267167138E-15 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.83878517 1E-15 1.4415419267167138E-15 0 155 1 3.237853 0.9117647 0.13326656969825684 1 157 0 -2.54584742 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 6.77223158 1 0 1 160 1 5.42115831 1 0 1 162 0 -2.41028237 1E-15 1.4415419267167138E-15 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.187345 1 0 1 232 0 0.397937775 0.714285731 1.8073550080489322 1 234 0 -1.183644 1E-15 1.4415419267167138E-15 0 -235 0 +235 0 ? ? ? 0 236 1 5.6324296 1 0 1 238 1 6.77774 1 0 1 243 0 -1.01514125 1E-15 1.4415419267167138E-15 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.8132205 1 0 1 287 0 -2.546271 1E-15 1.4415419267167138E-15 0 289 1 5.25516939 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77293825 1 0 1 298 0 -1.80093193 1E-15 1.4415419267167138E-15 0 302 1 7.77011251 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.68141246 1E-15 1.4415419267167138E-15 0 310 0 -2.68183613 1E-15 1.4415419267167138E-15 0 313 0 -2.94621468 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 318 0 -2.69217515 1E-15 1.4415419267167138E-15 0 320 1 3.800508 0.9444444 0.082462200658479604 1 322 0 -2.41028237 1E-15 1.4415419267167138E-15 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.81381369 1E-15 1.4415419267167138E-15 0 408 0 -1.46855927 1E-15 1.4415419267167138E-15 0 410 0 -2.81381369 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 4.83788157 1 0 1 417 0 -2.81381369 1E-15 1.4415419267167138E-15 0 420 0 -1.04175115 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt index 3b51cd60d5..689d51fcc1 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 3.64133739 0.9844741 0.022574810341618422 1 35 0 -2.796535 0.0199539755 0.029078592776682857 0 37 0 -1.65404248 0.0782266259 0.11751599963260938 0 -40 0 +40 0 ? ? ? 0 41 1 1.04337406 0.7116856 0.49068805388019454 1 44 1 4.33966541 0.9934526 0.0094769477341525628 1 45 0 -2.89273548 0.0177341755 0.025814589979333783 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -2.35179329 0.03427408 0.050314293386144675 0 141 0 -2.904073 0.0174890943 0.025454673744601461 0 144 0 -2.796535 0.0199539755 0.029078592776682857 0 -145 0 +145 0 ? ? ? 0 147 0 -2.463921 0.0299276374 0.04383572570114834 0 150 0 -2.8614285 0.01842858 0.026834852844454766 0 151 1 3.17632246 0.9725775 0.040114860620962978 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -3.01920986 0.0151813291 0.022069981158425081 0 156 0 -2.49565363 0.0287977811 0.042156377165560456 0 161 0 -2.30924 0.0360781476 0.053011906638395674 0 -164 0 +164 0 ? ? ? 0 167 1 2.38255262 0.9293544 0.10569919063924076 1 169 0 -3.03097248 0.0149631584 0.021750410734051751 0 171 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.227234 0.999821365 0.00025773902362411926 1 247 1 2.672493 0.9497448 0.074388155670230882 1 248 0 -1.711307 0.07322063 0.10970216917333013 0 -249 0 +249 0 ? ? ? 0 250 0 -2.60319138 0.0252686124 0.036923393165046718 0 252 0 2.992918 0.9657571 4.8680501214754637 1 254 1 5.35164165 0.998142242 0.0026826702788205732 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -2.804134 0.0197691489 0.028806541052408416 0 271 0 -2.34358668 0.0346150957 0.050823827322496622 0 272 1 2.73419476 0.9532994 0.068998701675288501 1 -275 0 +275 0 ? ? ? 0 276 0 -2.68139815 0.0229703225 0.033525709865030048 0 277 0 -2.91167164 0.0173267 0.025216237008576704 0 278 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -2.804134 0.0197691489 0.028806541052408416 0 293 1 3.413344 0.9794621 0.029938444898768346 1 296 0 1.47483253 0.8088676 2.3873555377194497 1 -297 0 +297 0 ? ? ? 0 299 1 3.53376913 0.982280254 0.025793396317687449 1 300 1 3.78027344 0.986915946 0.019000876915907086 1 301 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.45851088 0.980567157 0.028311653515975352 1 317 1 5.48386 0.9984247 0.0022744566762884174 1 319 0 1.44604874 0.803245664 2.3455326626007635 1 -321 0 +321 0 ? ? ? 0 323 1 3.36483288 0.978206754 0.031788669528507826 1 327 0 -2.91167164 0.0173267 0.025216237008576704 0 328 1 3.03614 0.9674988 0.047668253892284254 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 8.167599 0.9999448 7.9630164919418245E-05 1 613 0 -2.39343452 0.03259308 0.047805235639227908 0 614 0 -2.85382986 0.0186011065 0.027088450049153875 0 -617 0 +617 0 ? ? ? 0 618 0 -2.56626129 0.02643034 0.038643886518904186 0 619 0 -2.45112467 0.0303953178 0.044531429350354604 0 621 0 -0.5349846 0.255690753 0.42602593625059765 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.27471757 0.0249303821 0.036422867055812318 0 19 0 -2.00358748 0.03410183 0.050056992617068342 0 22 0 -2.5426836 0.0182464458 0.026567179066779304 0 -23 1 +23 1 ? ? ? 0 24 0 -2.68141246 0.015512228 0.022554807909292037 0 26 0 -2.33200574 0.02332543 0.034050161954667077 0 27 0 -2.27155375 0.0250220858 0.036558556406276949 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.546271 0.0181701127 0.026455011357386121 0 135 0 -1.52827668 0.0585265532 0.087007689086403689 0 136 0 -2.40711856 0.0213731956 0.031169296396687675 0 -139 0 +139 0 ? ? ? 0 140 0 -2.67508459 0.015627671 0.022723991158114246 0 142 1 2.1587286 0.83348304 0.26277525125659912 1 143 0 -1.54474568 0.05745574 0.085367730161702821 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.83878517 0.0411879122 0.060679997683715341 0 155 1 3.237853 0.947596252 0.077655602601770859 1 157 0 -2.54584742 0.0181791112 0.026468233670711376 0 -158 0 +158 0 ? ? ? 0 159 1 6.77223158 0.999177039 0.0011877710421138682 1 160 1 5.42115831 0.995904 0.0059213730758132374 1 162 0 -2.41028237 0.0212945715 0.031053393109467146 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.187345 0.9945968 0.0078163354020312284 1 232 0 0.397937775 0.381006956 0.69200489802945642 1 234 0 -1.183644 0.085664086 0.12920380666492423 0 -235 0 +235 0 ? ? ? 0 236 1 5.6324296 0.9968118 0.0046069378176225089 1 238 1 6.77774 0.9991824 0.0011800254692933552 1 243 0 -1.01514125 0.102734588 0.15639329612707187 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.8132205 0.999761462 0.00034417833577817593 1 287 0 -2.546271 0.0181701127 0.026455011357386121 0 289 1 5.25516939 0.9950138 0.0072115986401722143 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77293825 0.9911819 0.012778237651616852 1 298 0 -1.80093193 0.0430044457 0.063415872213561694 0 302 1 7.77011251 0.999748945 0.00036224093577622284 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.68141246 0.015512228 0.022554807909292037 0 310 0 -2.68183613 0.0155045288 0.022543525273161929 0 313 0 -2.94621468 0.0113662509 0.016491938164477051 0 -315 0 +315 0 ? ? ? 0 318 0 -2.69217515 0.0153178023 0.022269919361612874 0 320 1 3.800508 0.9724724 0.040270746291192143 1 322 0 -2.41028237 0.0212945715 0.031053393109467146 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.81381369 0.0132805621 0.019288165348886492 0 408 0 -1.46855927 0.06256822 0.093214386421141548 0 410 0 -2.81381369 0.0132805621 0.019288165348886492 0 -411 0 +411 0 ? ? ? 0 412 1 4.83788157 0.991832554 0.011831516022545778 1 417 0 -2.81381369 0.0132805621 0.019288165348886492 0 420 0 -1.04175115 0.09985152 0.15176509864238413 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt index 09a2fb9aa7..e51f951dee 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Assigned 32 1 3.64133739 1 35 0 -2.796535 0 37 0 -1.65404248 0 -40 0 +40 0 ? 0 41 1 1.04337406 1 44 1 4.33966541 1 45 0 -2.89273548 0 @@ -76,7 +76,7 @@ Instance Label Score Assigned 138 0 -2.35179329 0 141 0 -2.904073 0 144 0 -2.796535 0 -145 0 +145 0 ? 0 147 0 -2.463921 0 150 0 -2.8614285 0 151 1 3.17632246 1 @@ -84,7 +84,7 @@ Instance Label Score Assigned 154 0 -3.01920986 0 156 0 -2.49565363 0 161 0 -2.30924 0 -164 0 +164 0 ? 0 167 1 2.38255262 1 169 0 -3.03097248 0 171 0 -2.804134 0 @@ -130,7 +130,7 @@ Instance Label Score Assigned 246 1 7.227234 1 247 1 2.672493 1 248 0 -1.711307 0 -249 0 +249 0 ? 0 250 0 -2.60319138 0 252 0 2.992918 1 254 1 5.35164165 1 @@ -144,7 +144,7 @@ Instance Label Score Assigned 269 0 -2.804134 0 271 0 -2.34358668 0 272 1 2.73419476 1 -275 0 +275 0 ? 0 276 0 -2.68139815 0 277 0 -2.91167164 0 278 0 -2.804134 0 @@ -158,7 +158,7 @@ Instance Label Score Assigned 291 0 -2.804134 0 293 1 3.413344 1 296 0 1.47483253 1 -297 0 +297 0 ? 0 299 1 3.53376913 1 300 1 3.78027344 1 301 0 -2.804134 0 @@ -172,7 +172,7 @@ Instance Label Score Assigned 316 1 3.45851088 1 317 1 5.48386 1 319 0 1.44604874 1 -321 0 +321 0 ? 0 323 1 3.36483288 1 327 0 -2.91167164 0 328 1 3.03614 1 @@ -318,7 +318,7 @@ Instance Label Score Assigned 612 1 8.167599 1 613 0 -2.39343452 0 614 0 -2.85382986 0 -617 0 +617 0 ? 0 618 0 -2.56626129 0 619 0 -2.45112467 0 621 0 -0.5349846 0 @@ -375,7 +375,7 @@ Instance Label Score Assigned 17 0 -2.27471757 0 19 0 -2.00358748 0 22 0 -2.5426836 0 -23 1 +23 1 ? 0 24 0 -2.68141246 0 26 0 -2.33200574 0 27 0 -2.27155375 0 @@ -425,7 +425,7 @@ Instance Label Score Assigned 134 0 -2.546271 0 135 0 -1.52827668 0 136 0 -2.40711856 0 -139 0 +139 0 ? 0 140 0 -2.67508459 0 142 1 2.1587286 1 143 0 -1.54474568 0 @@ -435,7 +435,7 @@ Instance Label Score Assigned 153 0 -1.83878517 0 155 1 3.237853 1 157 0 -2.54584742 0 -158 0 +158 0 ? 0 159 1 6.77223158 1 160 1 5.42115831 1 162 0 -2.41028237 0 @@ -474,7 +474,7 @@ Instance Label Score Assigned 231 1 5.187345 1 232 0 0.397937775 1 234 0 -1.183644 0 -235 0 +235 0 ? 0 236 1 5.6324296 1 238 1 6.77774 1 243 0 -1.01514125 0 @@ -496,8 +496,8 @@ Instance Label Score Assigned 286 1 7.8132205 1 287 0 -2.546271 0 289 1 5.25516939 1 -292 1 -294 0 +292 1 ? 0 +294 0 ? 0 295 1 4.77293825 1 298 0 -1.80093193 0 302 1 7.77011251 1 @@ -506,7 +506,7 @@ Instance Label Score Assigned 307 0 -2.68141246 0 310 0 -2.68183613 0 313 0 -2.94621468 0 -315 0 +315 0 ? 0 318 0 -2.69217515 0 320 1 3.800508 1 322 0 -2.41028237 0 @@ -551,7 +551,7 @@ Instance Label Score Assigned 407 0 -2.81381369 0 408 0 -1.46855927 0 410 0 -2.81381369 0 -411 0 +411 0 ? 0 412 1 4.83788157 1 417 0 -2.81381369 0 420 0 -1.04175115 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt index 693440edc2..7664c7d409 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 2.35438 0.996385 0.0052248235129257635 1 35 0 -1.83556986 0.00724350661 0.010488202773633042 0 37 0 -0.8828192 0.07420456 0.11123463299619094 0 -40 0 +40 0 ? ? ? 0 41 1 0.6366718 0.7855496 0.34822575879653411 1 44 1 2.72072053 0.998558342 0.002081372862402793 1 45 0 -1.87994158 0.00648348359 0.0093841435784006184 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.51871562 0.0159320254 0.02317012154973026 0 141 0 -1.93865037 0.00559835136 0.0080994066187532153 0 144 0 -1.83556986 0.00724350661 0.010488202773633042 0 -145 0 +145 0 ? ? ? 0 147 0 -1.69274938 0.010342177 0.014998299390129487 0 150 0 -1.91297352 0.00596963847 0.0086381769438633477 0 151 1 1.55168462 0.973401248 0.038893469912665984 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -2.0423522 0.004318493 0.0062437599081359638 0 156 0 -1.73049688 0.009414184 0.013646131293487658 0 161 0 -1.46562588 0.0181668717 0.026450249055215744 0 -164 0 +164 0 ? ? ? 0 167 1 2.31870866 0.99604696 0.0057143326416850991 1 169 0 -2.041339 0.00432946626 0.0062596597963016712 0 171 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.8998096 0.9999256 0.00010732116581602008 1 247 1 0.9928844 0.8997386 0.15242216136874395 1 248 0 -1.22778583 0.03256034 0.047756414470681303 0 -249 0 +249 0 ? ? ? 0 250 0 -1.83357728 0.00727963867 0.010540711608751351 0 252 0 1.37199 0.958826959 4.6021561858199664 1 254 1 2.79375386 0.9988 0.0017323029622904203 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.8361913 0.00723227439 0.010471879968765333 0 271 0 -1.42138422 0.0202620383 0.02953215377914243 0 272 1 1.10222125 0.921965 0.11721610663727763 1 -275 0 +275 0 ? ? ? 0 276 0 -1.73186815 0.009382072 0.013599363941350218 0 277 0 -1.93927169 0.00558965746 0.0080867934194779039 0 278 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.8361913 0.00723227439 0.010471879968765333 0 293 1 1.65592456 0.9794098 0.030015442677317709 1 296 0 0.553571463 0.748245 1.9899076684215853 1 -297 0 +297 0 ? ? ? 0 299 1 2.001486 0.991262555 0.012660861084033199 1 300 1 2.09417748 0.9930671 0.010036913891508983 1 301 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 1.47151637 0.9676506 0.047441894201880302 1 317 1 3.06524873 0.999393463 0.00087531320929783803 1 319 0 0.8519552 0.8629285 2.8669995609516308 1 -321 0 +321 0 ? ? ? 0 323 1 1.75474 0.9838682 0.023463058122578043 1 327 0 -1.93927169 0.00558965746 0.0080867934194779039 0 328 1 1.43182349 0.964374959 0.052333903838185522 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 5.09431458 0.9999963 5.3314720279485219E-06 1 613 0 -1.63692176 0.0118829105 0.017246087060301379 0 614 0 -1.9123522 0.00597891957 0.0086516472088849234 0 -617 0 +617 0 ? ? ? 0 618 0 -1.62816632 0.0121443039 0.017627783540922228 0 619 0 -1.52446461 0.0157068819 0.022840087163427197 0 621 0 -0.188778639 0.31474337 0.54528371301135603 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.57970572 0.0144622317 0.021016934775320784 0 19 0 -1.32506859 0.0253080986 0.036981837697027814 0 22 0 -1.76987159 0.009491567 0.01375883626140059 0 -23 1 +23 1 ? ? ? 0 24 0 -1.96166122 0.00619609 0.0089668769102733102 0 26 0 -1.67495286 0.0117150256 0.017000988488892345 0 27 0 -1.51523459 0.0166727714 0.024256502854015347 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.75784481 0.009748338 0.014132877086598956 0 135 0 -1.07198834 0.04377895 0.06458392745656516 0 136 0 -1.64255309 0.0125861792 0.018273256294688772 0 -139 0 +139 0 ? ? ? 0 140 0 -1.832719 0.008254912 0.01195874763978641 0 142 1 1.20702124 0.8832422 0.17911900782229345 1 143 0 -1.436229 0.0198381469 0.028908095216224419 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.37983692 0.0224505328 0.032758385480089913 0 155 1 1.24272728 0.8912444 0.16610696969487637 1 157 0 -1.83434272 0.008225175 0.011915489581813862 0 -158 0 +158 0 ? ? ? 0 159 1 4.09360933 0.999795 0.00029575448112624953 1 160 1 3.04705143 0.997864842 0.0030836744978663621 1 162 0 -1.70702422 0.0109114433 0.015828398337213719 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 2.80366325 0.996321738 0.0053163942348769758 1 232 0 0.248882532 0.46910888 0.91351208341864276 1 234 0 -0.8223932 0.0741594 0.11116426368346338 0 -235 0 +235 0 ? ? ? 0 236 1 3.65436459 0.999451637 0.00079133718371403176 1 238 1 4.208801 0.99984163 0.00022849704563499302 1 243 0 -1.18483961 0.0343320966 0.050400968413634589 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 4.869809 0.999964 5.1939695512002597E-05 1 287 0 -1.75784481 0.009748338 0.014132877086598956 0 289 1 2.8387928 0.996599257 0.0049145964567919058 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 2.21859837 0.9864867 0.019628533461458434 1 298 0 -1.053101 0.0455854833 0.067312108231381534 0 302 1 4.847371 0.999962151 5.4605525036338512E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.96166122 0.00619609 0.0089668769102733102 0 310 0 -1.88516331 0.007346285 0.010637570327263628 0 313 0 -2.087356 0.00468213623 0.0067707579292392971 0 -315 0 +315 0 ? ? ? 0 318 0 -1.9255811 0.006714394 0.00971948958962956 0 320 1 1.98424911 0.9773634 0.03303300364958086 1 322 0 -1.70702422 0.0109114433 0.015828398337213719 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.02450848 0.00538646942 0.0077920373965741406 0 408 0 -1.276682 0.0281251986 0.041157619505191562 0 410 0 -2.02450848 0.00538646942 0.0077920373965741406 0 -411 0 +411 0 ? ? ? 0 412 1 2.820312 0.996455967 0.0051220399276606741 1 417 0 -2.02450848 0.00538646942 0.0077920373965741406 0 420 0 -1.02720916 0.0481775925 0.071235676455785449 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt index 0c2f376b8f..43672feda2 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.309107 0.9756098 0.035623875334191583 1 21 1 6.161626 1 0 1 22 0 -3.35177135 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -3.57612 1E-15 1.4415419267167138E-15 0 25 1 1.68778992 0.8125 0.29956028185890782 1 26 0 -3.21128821 1E-15 1.4415419267167138E-15 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.20465565 0.0625 0.093109404391481479 0 38 1 4.3035 0.9756098 0.035623875334191583 1 39 1 2.444232 0.84 0.25153881203904033 1 -40 0 +40 0 ? ? ? 0 41 1 2.349327 0.84 0.25153881203904033 1 42 1 6.69547653 1 0 1 43 1 0.1725626 0.7777778 0.36257005481575838 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -3.11112714 1E-15 1.4415419267167138E-15 0 137 0 -3.60871148 1E-15 1.4415419267167138E-15 0 138 0 -2.89319158 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -3.60871148 1E-15 1.4415419267167138E-15 0 141 0 -3.8493557 1E-15 1.4415419267167138E-15 0 142 1 2.848053 0.84 0.25153881203904033 1 143 0 -2.73937 1E-15 1.4415419267167138E-15 0 144 0 -3.59241557 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 146 1 1.31008816 0.8125 0.29956028185890782 1 147 0 -3.43885779 1E-15 1.4415419267167138E-15 0 148 0 -0.152019978 0.4 0.73696560849809378 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.618639 0.84 0.25153881203904033 1 156 0 -3.41809654 1E-15 1.4415419267167138E-15 0 157 0 -3.33547568 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 9.536341 1 0 1 160 1 6.860572 1 0 1 161 0 -2.5829134 1E-15 1.4415419267167138E-15 0 162 0 -3.09483147 1E-15 1.4415419267167138E-15 0 163 0 -2.50633955 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 165 0 -2.245256 1E-15 1.4415419267167138E-15 0 166 1 6.456311 1 0 1 167 1 4.319108 0.9756098 0.035623875334191583 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.8359299 0.7777778 2.1699250874336404 1 233 1 3.89948845 0.9411765 0.087462835875881578 1 234 0 -1.06992626 0.0625 0.093109404391481479 0 -235 0 +235 0 ? ? ? 0 236 1 8.428113 1 0 1 237 1 5.33226967 1 0 1 238 1 9.183852 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.645902 1 0 1 247 1 2.597087 0.84 0.25153881203904033 1 248 0 -1.497818 0.0625 0.093109404391481479 0 -249 0 +249 0 ? ? ? 0 250 0 -3.67503667 1E-15 1.4415419267167138E-15 0 251 1 5.0781126 1 0 1 252 0 3.308917 0.84 2.643855953298599 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 2.38956165 0.84 0.25153881203904033 1 273 1 0.3306446 0.7777778 0.36257005481575838 1 274 0 -2.82355762 1E-15 1.4415419267167138E-15 0 -275 0 +275 0 ? ? ? 0 276 0 -3.35177135 1E-15 1.4415419267167138E-15 0 277 0 -3.83305979 1E-15 1.4415419267167138E-15 0 278 0 -3.57612 1E-15 1.4415419267167138E-15 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.52714539 1 0 1 290 0 -4.08999968 1E-15 1.4415419267167138E-15 0 291 0 -3.57612 1E-15 1.4415419267167138E-15 0 -292 1 +292 1 ? ? ? 0 293 1 3.63595963 0.9411765 0.087462835875881578 1 -294 0 +294 0 ? ? ? 0 295 1 5.25647163 1 0 1 296 0 1.97336864 0.8125 2.4150374992788439 1 -297 0 +297 0 ? ? ? 0 298 0 -2.03882861 1E-15 1.4415419267167138E-15 0 299 1 4.99616432 1 0 1 300 1 5.63767242 1 0 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.14314938 0.84 0.25153881203904033 1 313 0 -4.08999968 1E-15 1.4415419267167138E-15 0 314 0 -3.88960457 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 316 1 3.149722 0.84 0.25153881203904033 1 317 1 7.02619457 1 0 1 318 0 -3.406486 1E-15 1.4415419267167138E-15 0 319 0 1.82393885 0.8125 2.4150374992788439 1 320 1 5.15885925 1 0 1 -321 0 +321 0 ? ? ? 0 322 0 -3.09483147 1E-15 1.4415419267167138E-15 0 323 1 3.22143555 0.84 0.25153881203904033 1 324 0 -3.57612 1E-15 1.4415419267167138E-15 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.29045367 1E-15 1.4415419267167138E-15 0 409 0 -3.13383579 1E-15 1.4415419267167138E-15 0 410 0 -3.83305979 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 6.52325153 1 0 1 413 0 -2.39560747 1E-15 1.4415419267167138E-15 0 414 1 5.304202 1 0 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -3.64896035 1E-15 1.4415419267167138E-15 0 615 0 -2.675256 1E-15 1.4415419267167138E-15 0 616 0 -3.35177135 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -3.11112714 1E-15 1.4415419267167138E-15 0 619 0 -2.870483 1E-15 1.4415419267167138E-15 0 620 0 -3.35177135 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt index 69424d1f96..b1c0d65d4f 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.309107 0.982883155 0.024908174987257281 1 21 1 6.161626 0.997486651 0.0036305605660221312 1 22 0 -3.35177135 0.0190011337 0.027676625763894382 0 -23 1 +23 1 ? ? ? 0 24 0 -3.57612 0.0150948567 0.021943310344814629 0 25 1 1.68778992 0.7883411 0.34310809466132391 1 26 0 -3.21128821 0.02193545 0.031998411900987428 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.20465565 0.154014841 0.24129573984468006 0 38 1 4.3035 0.98278445 0.025053063812451211 1 39 1 2.444232 0.8913242 0.16597778265246793 1 -40 0 +40 0 ? ? ? 0 41 1 2.349327 0.881352544 0.18220887767190111 1 42 1 6.69547653 0.99855864 0.002080942285094773 1 43 1 0.1725626 0.43382585 1.2048120730945795 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -3.11112714 0.0242936034 0.035481008429150813 0 137 0 -3.60871148 0.0145974671 0.021214914966477344 0 138 0 -2.89319158 0.0303093083 0.044403459692558656 0 -139 0 +139 0 ? ? ? 0 140 0 -3.60871148 0.0145974671 0.021214914966477344 0 141 0 -3.8493557 0.01139268 0.016530506138550251 0 142 1 2.848053 0.9259271 0.11102947905485054 1 143 0 -2.73937 0.0353999846 0.051997261707361145 0 144 0 -3.59241557 0.0148441121 0.021576064827714625 0 -145 0 +145 0 ? ? ? 0 146 1 1.31008816 0.7152061 0.48356908051152397 1 147 0 -3.43885779 0.01737916 0.025293256640113757 0 148 0 -0.152019978 0.353206038 0.62862188502856431 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.618639 0.9077395 0.139649725798073 1 156 0 -3.41809654 0.0177530367 0.025842292400220613 0 157 0 -3.33547568 0.01932072 0.028146699492868216 0 -158 0 +158 0 ? ? ? 0 159 1 9.536341 0.999925554 0.0001074071635410925 1 160 1 6.860572 0.998786449 0.0017518465781146784 1 161 0 -2.5829134 0.0414183028 0.061026700952795158 0 162 0 -3.09483147 0.0246999636 0.036081984337152172 0 163 0 -2.50633955 0.04470976 0.065988970965053517 0 -164 0 +164 0 ? ? ? 0 165 0 -2.245256 0.0579013154 0.086049905177797967 0 166 1 6.456311 0.998150766 0.0026703506849678977 1 167 1 4.319108 0.983057857 0.024651767929287045 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.8359299 0.6049175 1.3397742465159499 1 233 1 3.89948845 0.9739913 0.038019246409892023 1 234 0 -1.06992626 0.17323719 0.27445460033191865 0 -235 0 +235 0 ? ? ? 0 236 1 8.428113 0.99976337 0.0003414259594434771 1 237 1 5.33226967 0.994048536 0.0086117997569089607 1 238 1 9.183852 0.9998925 0.00015513669190214178 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.645902 0.9998115 0.00027201620468366962 1 247 1 2.597087 0.9058386 0.14267406349976108 1 248 0 -1.497818 0.118221387 0.1815116096306757 0 -249 0 +249 0 ? ? ? 0 250 0 -3.67503667 0.0136346063 0.019805910679898876 0 251 1 5.0781126 0.9922549 0.011217294571842519 1 252 0 3.308917 0.952875 4.4073641261174119 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 2.38956165 0.8856733 0.17515349285073656 1 273 1 0.3306446 0.474698573 1.0749163809587898 1 274 0 -2.82355762 0.0325194858 0.047695490325124906 0 -275 0 +275 0 ? ? ? 0 276 0 -3.35177135 0.0190011337 0.027676625763894382 0 277 0 -3.83305979 0.0115858121 0.016812375308683684 0 278 0 -3.57612 0.0150948567 0.021943310344814629 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.52714539 0.9982823 0.0024802287545360943 1 290 0 -4.08999968 0.008885143 0.012875839136462838 0 291 0 -3.57612 0.0150948567 0.021943310344814629 0 -292 1 +292 1 ? ? ? 0 293 1 3.63595963 0.9660381 0.049847993866089353 1 -294 0 +294 0 ? ? ? 0 295 1 5.25647163 0.9935618 0.0093183820918222807 1 296 0 1.97336864 0.8338234 2.589210701355853 1 -297 0 +297 0 ? ? ? 0 298 0 -2.03882861 0.0708337 0.10599126176574966 0 299 1 4.99616432 0.9915693 0.012214518276517204 1 300 1 5.63767242 0.995665669 0.0062667080268442431 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.14314938 0.856945038 0.22272541817603111 1 313 0 -4.08999968 0.008885143 0.012875839136462838 0 314 0 -3.88960457 0.0109292 0.015854298528928425 0 -315 0 +315 0 ? ? ? 0 316 1 3.149722 0.9448283 0.081875960094708669 1 317 1 7.02619457 0.9989789 0.0014738699989607148 1 318 0 -3.406486 0.0179655552 0.026154467003641695 0 319 0 1.82393885 0.8110785 2.4041411141982105 1 320 1 5.15885925 0.992876351 0.01031403381058839 1 -321 0 +321 0 ? ? ? 0 322 0 -3.09483147 0.0246999636 0.036081984337152172 0 323 1 3.22143555 0.948601961 0.076125243844936963 1 324 0 -3.57612 0.0150948567 0.021943310344814629 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.29045367 0.05538148 0.082196274856991033 0 409 0 -3.13383579 0.0237381756 0.034659977772739645 0 410 0 -3.83305979 0.0115858121 0.016812375308683684 0 -411 0 +411 0 ? ? ? 0 412 1 6.52325153 0.99827534 0.0024903070861691909 1 413 0 -2.39560747 0.0499131419 0.073868682437534181 0 414 1 5.304202 0.9938727 0.0088670155127870496 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -3.64896035 0.0140054561 0.020348431589167357 0 615 0 -2.675256 0.0377570055 0.055526832252899339 0 616 0 -3.35177135 0.0190011337 0.027676625763894382 0 -617 0 +617 0 ? ? ? 0 618 0 -3.11112714 0.0242936034 0.035481008429150813 0 619 0 -2.870483 0.0310136 0.045451678527033197 0 620 0 -3.35177135 0.0190011337 0.027676625763894382 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt index 0f08e7f566..55861f250c 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Assigned 20 1 4.309107 1 21 1 6.161626 1 22 0 -3.35177135 0 -23 1 +23 1 ? 0 24 0 -3.57612 0 25 1 1.68778992 1 26 0 -3.21128821 0 @@ -39,7 +39,7 @@ Instance Label Score Assigned 37 0 -1.20465565 0 38 1 4.3035 1 39 1 2.444232 1 -40 0 +40 0 ? 0 41 1 2.349327 1 42 1 6.69547653 1 43 1 0.1725626 1 @@ -138,13 +138,13 @@ Instance Label Score Assigned 136 0 -3.11112714 0 137 0 -3.60871148 0 138 0 -2.89319158 0 -139 0 +139 0 ? 0 140 0 -3.60871148 0 141 0 -3.8493557 0 142 1 2.848053 1 143 0 -2.73937 0 144 0 -3.59241557 0 -145 0 +145 0 ? 0 146 1 1.31008816 1 147 0 -3.43885779 0 148 0 -0.152019978 0 @@ -157,13 +157,13 @@ Instance Label Score Assigned 155 1 2.618639 1 156 0 -3.41809654 0 157 0 -3.33547568 0 -158 0 +158 0 ? 0 159 1 9.536341 1 160 1 6.860572 1 161 0 -2.5829134 0 162 0 -3.09483147 0 163 0 -2.50633955 0 -164 0 +164 0 ? 0 165 0 -2.245256 0 166 1 6.456311 1 167 1 4.319108 1 @@ -234,7 +234,7 @@ Instance Label Score Assigned 232 0 0.8359299 1 233 1 3.89948845 1 234 0 -1.06992626 0 -235 0 +235 0 ? 0 236 1 8.428113 1 237 1 5.33226967 1 238 1 9.183852 1 @@ -248,7 +248,7 @@ Instance Label Score Assigned 246 1 8.645902 1 247 1 2.597087 1 248 0 -1.497818 0 -249 0 +249 0 ? 0 250 0 -3.67503667 0 251 1 5.0781126 1 252 0 3.308917 1 @@ -274,7 +274,7 @@ Instance Label Score Assigned 272 1 2.38956165 1 273 1 0.3306446 1 274 0 -2.82355762 0 -275 0 +275 0 ? 0 276 0 -3.35177135 0 277 0 -3.83305979 0 278 0 -3.57612 0 @@ -291,12 +291,12 @@ Instance Label Score Assigned 289 1 6.52714539 1 290 0 -4.08999968 0 291 0 -3.57612 0 -292 1 +292 1 ? 0 293 1 3.63595963 1 -294 0 +294 0 ? 0 295 1 5.25647163 1 296 0 1.97336864 1 -297 0 +297 0 ? 0 298 0 -2.03882861 0 299 1 4.99616432 1 300 1 5.63767242 1 @@ -314,13 +314,13 @@ Instance Label Score Assigned 312 1 2.14314938 1 313 0 -4.08999968 0 314 0 -3.88960457 0 -315 0 +315 0 ? 0 316 1 3.149722 1 317 1 7.02619457 1 318 0 -3.406486 0 319 0 1.82393885 1 320 1 5.15885925 1 -321 0 +321 0 ? 0 322 0 -3.09483147 0 323 1 3.22143555 1 324 0 -3.57612 0 @@ -410,7 +410,7 @@ Instance Label Score Assigned 408 0 -2.29045367 0 409 0 -3.13383579 0 410 0 -3.83305979 0 -411 0 +411 0 ? 0 412 1 6.52325153 1 413 0 -2.39560747 0 414 1 5.304202 1 @@ -616,7 +616,7 @@ Instance Label Score Assigned 614 0 -3.64896035 0 615 0 -2.675256 0 616 0 -3.35177135 0 -617 0 +617 0 ? 0 618 0 -3.11112714 0 619 0 -2.870483 0 620 0 -3.35177135 0 diff --git a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt index f1da00a926..e07ceec647 100644 --- a/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 2.27614474 0.99046284 0.01382524609823586 1 21 1 2.71551442 0.9964734 0.0050968413754289565 1 22 0 -1.91610467 0.007339344 0.010627482294138626 0 -23 1 +23 1 ? ? ? 0 24 0 -2.12083673 0.00461633364 0.0066753814217522627 0 25 1 0.579404354 0.6851912 0.54542144241146051 1 26 0 -1.88108075 0.007944072 0.011506638331166432 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.9763708 0.0591678135 0.087990678318914578 0 38 1 1.97923112 0.9814142 0.027065950749429996 1 39 1 0.8320954 0.794680536 0.33155308613045453 1 -40 0 +40 0 ? ? ? 0 41 1 0.88030076 0.812019646 0.30041346219868831 1 42 1 3.29876113 0.9990636 0.0013515566126819322 1 43 1 0.21289444 0.485705882 1.0418451361573347 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.76776826 0.010259659 0.014878012048023576 0 137 0 -2.00804472 0.00596073642 0.0086252569347725905 0 138 0 -1.64871967 0.0134130223 0.019481850015569589 0 -139 0 +139 0 ? ? ? 0 140 0 -2.00804472 0.00596073642 0.0086252569347725905 0 141 0 -2.15638113 0.00425880356 0.0061572754121802442 0 142 1 1.42587113 0.93738085 0.093292772480968233 1 143 0 -1.57122457 0.0159614533 0.023213265078036949 0 144 0 -2.06444073 0.005245867 0.007588107121760313 0 -145 0 +145 0 ? ? ? 0 146 1 0.5954063 0.693000734 0.52907121478369279 1 147 0 -1.97373641 0.00644216966 0.0093241524569279595 0 148 0 -0.5180371 0.15157719 0.23714468564136862 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 1.38289 0.931381464 0.10255592440494538 1 156 0 -2.00303721 0.00602870947 0.0087239127220832425 0 157 0 -1.97250068 0.00646021264 0.0093503519898618208 0 -158 0 +158 0 ? ? ? 0 159 1 4.52930737 0.9999432 8.1952060728897203E-05 1 160 1 3.334266 0.9991363 0.0012465526889135054 1 161 0 -1.5841018 0.015507183 0.022547414886151074 0 162 0 -1.82416427 0.009033906 0.01309239897866022 0 163 0 -1.37575865 0.0246935654 0.03607251996647886 0 -164 0 +164 0 ? ? ? 0 165 0 -1.40072858 0.0233600326 0.034101275741716536 0 166 1 3.14331079 0.9986662 0.0019255122730899305 1 167 1 2.61804938 0.995600462 0.0063611951559849299 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.262052536 0.5136946 1.0400654369740645 1 233 1 2.16407657 0.987723053 0.017821513393369265 1 234 0 -1.00943446 0.0551115535 0.081784080061402753 0 -235 0 +235 0 ? ? ? 0 236 1 4.29103756 0.999902248 0.00014103266690546063 1 237 1 2.42226958 0.9931447 0.0099241759578403005 1 238 1 4.649124 0.9999568 6.2345058017421014E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 4.143937 0.9998633 0.00019719158494834585 1 247 1 1.19717526 0.898898065 0.15377057127992425 1 248 0 -1.217197 0.03506021 0.051489167959956395 0 -249 0 +249 0 ? ? ? 0 250 0 -2.09497738 0.004895074 0.0070794400974987601 0 251 1 2.700413 0.9963504 0.0052748803033137396 1 252 0 1.58717227 0.955785036 4.4993214783146023 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.980280161 0.8443483 0.2440898311411345 1 273 1 0.00625777245 0.3710024 1.4304995524008379 1 274 0 -1.73243809 0.0111099789 0.01611801356385726 0 -275 0 +275 0 ? ? ? 0 276 0 -1.91610467 0.007339344 0.010627482294138626 0 277 0 -2.21277714 0.00374727719 0.0054163328257276827 0 278 0 -2.12083673 0.00461633364 0.0066753814217522627 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 3.0956676 0.9985136 0.002146047033326764 1 290 0 -2.30471754 0.00304132653 0.0043943924919565489 0 291 0 -2.12083673 0.00461633364 0.0066753814217522627 0 -292 1 +292 1 ? ? ? 0 293 1 1.9506762 0.980189741 0.028867048302480647 1 -294 0 +294 0 ? ? ? 0 295 1 2.57708764 0.995172262 0.0069818201865888847 1 296 0 0.6443205 0.7161847 1.8169756066019795 1 -297 0 +297 0 ? ? ? 0 298 0 -1.00866961 0.0552023575 0.081922729978655878 0 299 1 2.32091022 0.9913795 0.012490669010981763 1 300 1 2.40809751 0.992921352 0.010248646031556275 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 1.19807649 0.899084449 0.15347146394878203 1 313 0 -2.30471754 0.00304132653 0.0043943924919565489 0 314 0 -2.26844478 0.00330244377 0.0047723037303054865 0 -315 0 +315 0 ? ? ? 0 316 1 1.47730184 0.943915665 0.083270128208594502 1 317 1 3.18132329 0.998776734 0.0017658802629188005 1 318 0 -1.9538343 0.00673895236 0.0097551599741469968 0 319 0 0.68000555 0.732414246 1.9019267823886503 1 320 1 2.448116 0.993534148 0.009358541174187586 1 -321 0 +321 0 ? ? ? 0 322 0 -1.82416427 0.009033906 0.01309239897866022 0 323 1 1.739836 0.9683619 0.046381755881038968 1 324 0 -2.12083673 0.00461633364 0.0066753814217522627 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.41561711 0.022598628 0.032976965091470092 0 409 0 -1.79705584 0.009603847 0.013922383925987339 0 410 0 -2.21277714 0.00374727719 0.0054163328257276827 0 -411 0 +411 0 ? ? ? 0 412 1 3.177976 0.9987674 0.0017793974994358322 1 413 0 -1.40844309 0.0229624342 0.033514061939899156 0 414 1 2.38908362 0.992610335 0.010700618410080708 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -2.1201086 0.00462396163 0.0066864373761273176 0 615 0 -1.529671 0.01751844 0.025497765300806271 0 616 0 -1.91610467 0.007339344 0.010627482294138626 0 -617 0 +617 0 ? ? ? 0 618 0 -1.76776826 0.010259659 0.014878012048023576 0 619 0 -1.61943209 0.0143251875 0.020816334525491278 0 620 0 -1.91610467 0.007339344 0.010627482294138626 0 diff --git a/test/BaselineOutput/SingleDebug/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt b/test/BaselineOutput/SingleDebug/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt index 3608e165c9..338a27b79c 100644 --- a/test/BaselineOutput/SingleDebug/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt +++ b/test/BaselineOutput/SingleDebug/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt @@ -38,6 +38,8 @@ DCG@2: 0.000000 (0.0000) DCG@3: 0.000000 (0.0000) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleDebug/Command/Datatypes-1-out.txt b/test/BaselineOutput/SingleDebug/Command/Datatypes-1-out.txt new file mode 100644 index 0000000000..fe04f014c2 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/Command/Datatypes-1-out.txt @@ -0,0 +1 @@ +Wrote 5 rows across 9 columns in %Time% diff --git a/test/BaselineOutput/SingleDebug/Command/DataTypes-out.txt b/test/BaselineOutput/SingleDebug/Command/Datatypes-2-out.txt similarity index 100% rename from test/BaselineOutput/SingleDebug/Command/DataTypes-out.txt rename to test/BaselineOutput/SingleDebug/Command/Datatypes-2-out.txt diff --git a/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt b/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt index e7d128e400..aaf1a3cb2e 100644 --- a/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt +++ b/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt @@ -14,6 +14,6 @@ bl i1 i2 i4 i8 ts dto dt tx 0 127 32767 2147483647 9223372036854775807 "2.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" foo 1 -127 -32767 -2147483647 -9223372036854775807 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" xyz - "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" +0 -128 -32768 -2147483648 -9223372036854775808 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" "" 9 0:0 - +0 -128 -32768 -2147483648 -9223372036854775808 "00:00:00" "0001-01-01T00:00:00.0000000+00:00" "0001-01-01T00:00:00.0000000" "" diff --git a/test/BaselineOutput/SingleRelease/Command/DataTypes-out.txt b/test/BaselineOutput/SingleDebug/Command/Datatypes-out.txt similarity index 100% rename from test/BaselineOutput/SingleRelease/Command/DataTypes-out.txt rename to test/BaselineOutput/SingleDebug/Command/Datatypes-out.txt diff --git a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key-out.txt index 193a84b3cd..f6c942f824 100644 --- a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key-out.txt +++ b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key-out.txt @@ -46,6 +46,12 @@ Log-loss: 0.253074 (0.0597) Log-loss reduction: 76.713844 (5.4729) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt index e6f1c68e64..28327f69a4 100644 --- a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt +++ b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt @@ -50,6 +50,10 @@ Log-loss: 0.253074 (0.0597) Log-loss reduction: 76.713839 (5.4729) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt index b337fd78da..b113f448bc 100644 --- a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 1.25826859 1 0 1 35 0 -1.41764426 1E-15 1.4415419267167138E-15 0 37 0 -0.835641861 1E-15 1.4415419267167138E-15 0 -40 0 +40 0 ? ? ? 0 41 1 0.227130175 0.8666667 0.20645086423799175 1 44 1 1.61423349 1 0 1 45 0 -1.47288513 1E-15 1.4415419267167138E-15 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.19960022 1E-15 1.4415419267167138E-15 0 141 0 -1.47188914 1E-15 1.4415419267167138E-15 0 144 0 -1.41764426 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 147 0 -1.334388 1E-15 1.4415419267167138E-15 0 150 0 -1.48176765 1E-15 1.4415419267167138E-15 0 151 1 0.7797704 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.54539037 1E-15 1.4415419267167138E-15 0 156 0 -1.35846376 1E-15 1.4415419267167138E-15 0 161 0 -1.1599288 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 167 1 1.07981849 1 0 1 169 0 -1.5413084 1E-15 1.4415419267167138E-15 0 171 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.368261 1 0 1 247 1 0.4837153 0.8666667 0.20645086423799175 1 248 0 -0.934056163 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -1.41270852 1E-15 1.4415419267167138E-15 0 252 0 0.706041336 0.8666667 2.9068906815998465 1 254 1 1.600352 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.43690062 1E-15 1.4415419267167138E-15 0 271 0 -1.1428957 1E-15 1.4415419267167138E-15 0 272 1 0.435359716 0.8666667 0.20645086423799175 1 -275 0 +275 0 ? ? ? 0 276 0 -1.34414315 1E-15 1.4415419267167138E-15 0 277 0 -1.49114561 1E-15 1.4415419267167138E-15 0 278 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.43690062 1E-15 1.4415419267167138E-15 0 293 1 0.9331081 1 0 1 296 0 0.221757889 0.8666667 2.9068906815998465 1 -297 0 +297 0 ? ? ? 0 299 1 1.13769388 1 0 1 300 1 1.24829745 1 0 1 301 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.7455783 0.9151356 0.12794252993534275 1 317 1 1.81665158 1 0 1 319 0 0.285441637 0.8666667 2.9068906815998465 1 -321 0 +321 0 ? ? ? 0 323 1 0.8990345 1 0 1 327 0 -1.49114561 1E-15 1.4415419267167138E-15 0 328 1 0.8467562 1 0 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.95104 1 0 1 613 0 -1.29436374 1E-15 1.4415419267167138E-15 0 614 0 -1.4625113 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -1.2706418 1E-15 1.4415419267167138E-15 0 619 0 -1.19714069 1E-15 1.4415419267167138E-15 0 621 0 -0.3866408 0.5799383 1.2513268180432666 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.13828516 1E-15 1.4415419267167138E-15 0 19 0 -0.9969288 1E-15 1.4415419267167138E-15 0 22 0 -1.259604 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -1.35031986 1E-15 1.4415419267167138E-15 0 26 0 -1.22985053 1E-15 1.4415419267167138E-15 0 27 0 -1.11824775 1E-15 1.4415419267167138E-15 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.22317672 1E-15 1.4415419267167138E-15 0 135 0 -0.8295188 1E-15 1.4415419267167138E-15 0 136 0 -1.18892586 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -1.3102448 1E-15 1.4415419267167138E-15 0 142 1 0.6019325 0.875 0.19264507794239591 1 143 0 -1.00445139 1E-15 1.4415419267167138E-15 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.01599169 1E-15 1.4415419267167138E-15 0 155 1 0.7134235 0.875 0.19264507794239591 1 157 0 -1.27964163 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 2.42447567 1 0 1 160 1 1.81060028 1 0 1 162 0 -1.20896339 1E-15 1.4415419267167138E-15 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.72919893 1 0 1 232 0 -0.038654685 0.6923077 1.7004398041324202 0 234 0 -0.661043048 1E-15 1.4415419267167138E-15 0 -235 0 +235 0 ? ? ? 0 236 1 2.13174057 1 0 1 238 1 2.40485334 1 0 1 243 0 -0.8018886 1E-15 1.4415419267167138E-15 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 2.9424572 1 0 1 287 0 -1.22317672 1E-15 1.4415419267167138E-15 0 289 1 1.71858239 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.42958808 1 0 1 298 0 -0.7813908 1E-15 1.4415419267167138E-15 0 302 1 2.925787 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.35031986 1E-15 1.4415419267167138E-15 0 310 0 -1.293855 1E-15 1.4415419267167138E-15 0 313 0 -1.45160127 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 318 0 -1.24103785 1E-15 1.4415419267167138E-15 0 320 1 1.18153763 1 0 1 322 0 -1.20896339 1E-15 1.4415419267167138E-15 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.40096045 1E-15 1.4415419267167138E-15 0 408 0 -0.928058 1E-15 1.4415419267167138E-15 0 410 0 -1.40096045 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 1.731832 1 0 1 417 0 -1.40096045 1E-15 1.4415419267167138E-15 0 420 0 -0.7677182 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt index fed143de3d..000020d071 100644 --- a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Assigned 32 1 1.25826859 1 35 0 -1.41764426 0 37 0 -0.835641861 0 -40 0 +40 0 ? 0 41 1 0.227130175 1 44 1 1.61423349 1 45 0 -1.47288513 0 @@ -76,7 +76,7 @@ Instance Label Score Assigned 138 0 -1.19960022 0 141 0 -1.47188914 0 144 0 -1.41764426 0 -145 0 +145 0 ? 0 147 0 -1.334388 0 150 0 -1.48176765 0 151 1 0.7797704 1 @@ -84,7 +84,7 @@ Instance Label Score Assigned 154 0 -1.54539037 0 156 0 -1.35846376 0 161 0 -1.1599288 0 -164 0 +164 0 ? 0 167 1 1.07981849 1 169 0 -1.5413084 0 171 0 -1.43690062 0 @@ -130,7 +130,7 @@ Instance Label Score Assigned 246 1 2.368261 1 247 1 0.4837153 1 248 0 -0.934056163 0 -249 0 +249 0 ? 0 250 0 -1.41270852 0 252 0 0.706041336 1 254 1 1.600352 1 @@ -144,7 +144,7 @@ Instance Label Score Assigned 269 0 -1.43690062 0 271 0 -1.1428957 0 272 1 0.435359716 1 -275 0 +275 0 ? 0 276 0 -1.34414315 0 277 0 -1.49114561 0 278 0 -1.43690062 0 @@ -158,7 +158,7 @@ Instance Label Score Assigned 291 0 -1.43690062 0 293 1 0.9331081 1 296 0 0.221757889 1 -297 0 +297 0 ? 0 299 1 1.13769388 1 300 1 1.24829745 1 301 0 -1.43690062 0 @@ -172,7 +172,7 @@ Instance Label Score Assigned 316 1 0.7455783 1 317 1 1.81665158 1 319 0 0.285441637 1 -321 0 +321 0 ? 0 323 1 0.8990345 1 327 0 -1.49114561 0 328 1 0.8467562 1 @@ -318,7 +318,7 @@ Instance Label Score Assigned 612 1 2.95104 1 613 0 -1.29436374 0 614 0 -1.4625113 0 -617 0 +617 0 ? 0 618 0 -1.2706418 0 619 0 -1.19714069 0 621 0 -0.3866408 0 @@ -375,7 +375,7 @@ Instance Label Score Assigned 17 0 -1.13828516 0 19 0 -0.9969288 0 22 0 -1.259604 0 -23 1 +23 1 ? 0 24 0 -1.35031986 0 26 0 -1.22985053 0 27 0 -1.11824775 0 @@ -425,7 +425,7 @@ Instance Label Score Assigned 134 0 -1.22317672 0 135 0 -0.8295188 0 136 0 -1.18892586 0 -139 0 +139 0 ? 0 140 0 -1.3102448 0 142 1 0.6019325 1 143 0 -1.00445139 0 @@ -435,7 +435,7 @@ Instance Label Score Assigned 153 0 -1.01599169 0 155 1 0.7134235 1 157 0 -1.27964163 0 -158 0 +158 0 ? 0 159 1 2.42447567 1 160 1 1.81060028 1 162 0 -1.20896339 0 @@ -474,7 +474,7 @@ Instance Label Score Assigned 231 1 1.72919893 1 232 0 -0.038654685 0 234 0 -0.661043048 0 -235 0 +235 0 ? 0 236 1 2.13174057 1 238 1 2.40485334 1 243 0 -0.8018886 0 @@ -496,8 +496,8 @@ Instance Label Score Assigned 286 1 2.9424572 1 287 0 -1.22317672 0 289 1 1.71858239 1 -292 1 -294 0 +292 1 ? 0 +294 0 ? 0 295 1 1.42958808 1 298 0 -0.7813908 0 302 1 2.925787 1 @@ -506,7 +506,7 @@ Instance Label Score Assigned 307 0 -1.35031986 0 310 0 -1.293855 0 313 0 -1.45160127 0 -315 0 +315 0 ? 0 318 0 -1.24103785 0 320 1 1.18153763 1 322 0 -1.20896339 0 @@ -551,7 +551,7 @@ Instance Label Score Assigned 407 0 -1.40096045 0 408 0 -0.928058 0 410 0 -1.40096045 0 -411 0 +411 0 ? 0 412 1 1.731832 1 417 0 -1.40096045 0 420 0 -0.7677182 0 diff --git a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt index 9c673bc10a..905fbd46fd 100644 --- a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.133441 1 0 1 21 1 1.46698761 1 0 1 22 0 -1.29266214 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -1.38898408 1E-15 1.4415419267167138E-15 0 25 1 0.2072978 0.8125 0.29956028185890782 1 26 0 -1.27053475 1E-15 1.4415419267167138E-15 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.7899848 1E-15 1.4415419267167138E-15 0 38 1 1.07285547 0.955555558 0.065588337627980248 1 39 1 0.444084644 0.826086938 0.27563447429444238 1 -40 0 +40 0 ? ? ? 0 41 1 0.278235674 0.8125 0.29956028185890782 1 42 1 1.82501435 1 0 1 43 1 0.0648527145 0.8125 0.29956028185890782 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.21864986 1E-15 1.4415419267167138E-15 0 137 0 -1.34436476 1E-15 1.4415419267167138E-15 0 138 0 -1.15066063 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -1.34436476 1E-15 1.4415419267167138E-15 0 141 0 -1.418377 1E-15 1.4415419267167138E-15 0 142 1 0.605928659 0.826086938 0.27563447429444238 1 143 0 -1.04888356 1E-15 1.4415419267167138E-15 0 144 0 -1.36667442 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 146 1 0.263422 0.8125 0.29956028185890782 1 147 0 -1.28818154 1E-15 1.4415419267167138E-15 0 148 0 -0.38141644 0.333333343 0.5849625222189877 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.7075844 0.826086938 0.27563447429444238 1 156 0 -1.30894148 1E-15 1.4415419267167138E-15 0 157 0 -1.3149718 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 2.41813564 1 0 1 160 1 1.80749393 1 0 1 161 0 -1.10798192 1E-15 1.4415419267167138E-15 0 162 0 -1.24095953 1E-15 1.4415419267167138E-15 0 163 0 -1.14838839 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 165 0 -0.9971055 1E-15 1.4415419267167138E-15 0 166 1 1.77878284 1 0 1 167 1 1.0986836 0.955555558 0.065588337627980248 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.0348134041 0.727272749 1.8744692325712462 0 233 1 1.102045 1 0 1 234 0 -0.709003 0.1 0.15200309583369792 0 -235 0 +235 0 ? ? ? 0 236 1 2.13387632 1 0 1 237 1 1.230866 1 0 1 238 1 2.40226221 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.38478136 1 0 1 247 1 0.5292797 0.826086938 0.27563447429444238 1 248 0 -0.8721206 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -1.3606441 1E-15 1.4415419267167138E-15 0 251 1 1.35184932 1 0 1 252 0 0.7461872 0.826086938 2.5235618055722013 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.451641083 0.826086938 0.27563447429444238 1 273 1 -0.184997916 0.727272749 0.45943157564163517 0 274 0 -1.1819942 1E-15 1.4415419267167138E-15 0 -275 0 +275 0 ? ? ? 0 276 0 -1.29266214 1E-15 1.4415419267167138E-15 0 277 0 -1.4406867 1E-15 1.4415419267167138E-15 0 278 0 -1.38898408 1E-15 1.4415419267167138E-15 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.698751 1 0 1 290 0 -1.49238932 1E-15 1.4415419267167138E-15 0 291 0 -1.38898408 1E-15 1.4415419267167138E-15 0 -292 1 +292 1 ? ? ? 0 293 1 0.9705 0.955555558 0.065588337627980248 1 -294 0 +294 0 ? ? ? 0 295 1 1.44576406 1 0 1 296 0 0.264410973 0.8125 2.4150374992788439 1 -297 0 +297 0 ? ? ? 0 298 0 -0.785661459 1E-15 1.4415419267167138E-15 0 299 1 1.16638494 1 0 1 300 1 1.27287531 1 0 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.6419792 0.826086938 0.27563447429444238 1 313 0 -1.49238932 1E-15 1.4415419267167138E-15 0 314 0 -1.4823153 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 316 1 0.773257732 0.826086938 0.27563447429444238 1 317 1 1.83582067 1 0 1 318 0 -1.26409817 1E-15 1.4415419267167138E-15 0 319 0 0.303462982 0.8125 2.4150374992788439 1 320 1 1.16069293 1 0 1 -321 0 +321 0 ? ? ? 0 322 0 -1.24095953 1E-15 1.4415419267167138E-15 0 323 1 0.909108639 0.955555558 0.065588337627980248 1 324 0 -1.38898408 1E-15 1.4415419267167138E-15 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.9617897 1E-15 1.4415419267167138E-15 0 409 0 -1.22467291 1E-15 1.4415419267167138E-15 0 410 0 -1.4406867 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 1.77363062 1 0 1 413 0 -1.02494574 1E-15 1.4415419267167138E-15 0 414 1 1.26032782 1 0 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.408303 1E-15 1.4415419267167138E-15 0 615 0 -1.0826714 1E-15 1.4415419267167138E-15 0 616 0 -1.29266214 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -1.21864986 1E-15 1.4415419267167138E-15 0 619 0 -1.14463758 1E-15 1.4415419267167138E-15 0 620 0 -1.29266214 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt index c5e41e5240..1ee365e6f7 100644 --- a/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleDebug/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Assigned 20 1 1.133441 1 21 1 1.46698761 1 22 0 -1.29266214 0 -23 1 +23 1 ? 0 24 0 -1.38898408 0 25 1 0.2072978 1 26 0 -1.27053475 0 @@ -39,7 +39,7 @@ Instance Label Score Assigned 37 0 -0.7899848 0 38 1 1.07285547 1 39 1 0.444084644 1 -40 0 +40 0 ? 0 41 1 0.278235674 1 42 1 1.82501435 1 43 1 0.0648527145 1 @@ -138,13 +138,13 @@ Instance Label Score Assigned 136 0 -1.21864986 0 137 0 -1.34436476 0 138 0 -1.15066063 0 -139 0 +139 0 ? 0 140 0 -1.34436476 0 141 0 -1.418377 0 142 1 0.605928659 1 143 0 -1.04888356 0 144 0 -1.36667442 0 -145 0 +145 0 ? 0 146 1 0.263422 1 147 0 -1.28818154 0 148 0 -0.38141644 0 @@ -157,13 +157,13 @@ Instance Label Score Assigned 155 1 0.7075844 1 156 0 -1.30894148 0 157 0 -1.3149718 0 -158 0 +158 0 ? 0 159 1 2.41813564 1 160 1 1.80749393 1 161 0 -1.10798192 0 162 0 -1.24095953 0 163 0 -1.14838839 0 -164 0 +164 0 ? 0 165 0 -0.9971055 0 166 1 1.77878284 1 167 1 1.0986836 1 @@ -234,7 +234,7 @@ Instance Label Score Assigned 232 0 -0.0348134041 0 233 1 1.102045 1 234 0 -0.709003 0 -235 0 +235 0 ? 0 236 1 2.13387632 1 237 1 1.230866 1 238 1 2.40226221 1 @@ -248,7 +248,7 @@ Instance Label Score Assigned 246 1 2.38478136 1 247 1 0.5292797 1 248 0 -0.8721206 0 -249 0 +249 0 ? 0 250 0 -1.3606441 0 251 1 1.35184932 1 252 0 0.7461872 1 @@ -274,7 +274,7 @@ Instance Label Score Assigned 272 1 0.451641083 1 273 1 -0.184997916 0 274 0 -1.1819942 0 -275 0 +275 0 ? 0 276 0 -1.29266214 0 277 0 -1.4406867 0 278 0 -1.38898408 0 @@ -291,12 +291,12 @@ Instance Label Score Assigned 289 1 1.698751 1 290 0 -1.49238932 0 291 0 -1.38898408 0 -292 1 +292 1 ? 0 293 1 0.9705 1 -294 0 +294 0 ? 0 295 1 1.44576406 1 296 0 0.264410973 1 -297 0 +297 0 ? 0 298 0 -0.785661459 0 299 1 1.16638494 1 300 1 1.27287531 1 @@ -314,13 +314,13 @@ Instance Label Score Assigned 312 1 0.6419792 1 313 0 -1.49238932 0 314 0 -1.4823153 0 -315 0 +315 0 ? 0 316 1 0.773257732 1 317 1 1.83582067 1 318 0 -1.26409817 0 319 0 0.303462982 1 320 1 1.16069293 1 -321 0 +321 0 ? 0 322 0 -1.24095953 0 323 1 0.909108639 1 324 0 -1.38898408 0 @@ -410,7 +410,7 @@ Instance Label Score Assigned 408 0 -0.9617897 0 409 0 -1.22467291 0 410 0 -1.4406867 0 -411 0 +411 0 ? 0 412 1 1.77363062 1 413 0 -1.02494574 0 414 1 1.26032782 1 @@ -616,7 +616,7 @@ Instance Label Score Assigned 614 0 -1.408303 0 615 0 -1.0826714 0 616 0 -1.29266214 0 -617 0 +617 0 ? 0 618 0 -1.21864986 0 619 0 -1.14463758 0 620 0 -1.29266214 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.txt index 55744d3dbb..c3e87fdfec 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.01161575 0.999099433 0.0012998283148002473 1 35 0 -5.546275 0.0038867984 0.0056183906346792553 0 37 0 -0.627175331 0.3481513 0.61739094617065504 0 -40 0 +40 0 ? ? ? 0 41 1 3.0365572 0.9541986 0.067638527006529184 1 44 1 6.61341858 0.998659551 0.0019351561965343684 1 45 0 -5.565003 0.00381495967 0.0055143486004666058 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.72209072 0.008818108 0.012778263407408707 0 141 0 -6.18090439 0.002064286 0.0029812133205916491 0 144 0 -5.546275 0.0038867984 0.0056183906346792553 0 -145 0 +145 0 ? ? ? 0 147 0 -5.305633 0.0049390397 0.0071431828757896592 0 150 0 -5.43029261 0.004362697 0.0063078105710409225 0 151 1 4.990402 0.993243039 0.0097813179599174817 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.58201027 0.00138314639 0.0019968397134892645 0 156 0 -5.6663394 0.00344857574 0.0049838416366178697 0 161 0 -3.992939 0.0181113519 0.026368671070599831 0 -164 0 +164 0 ? ? ? 0 167 1 6.79999542 0.9988875 0.0016059215653949079 1 169 0 -6.41288567 0.00163759827 0.0023644914709276374 0 171 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.4554977 0.9999712 4.1534408054175585E-05 1 247 1 2.7647686 0.940742 0.088128954823708028 1 248 0 -3.52696 0.0285547972 0.041795476162409473 0 -249 0 +249 0 ? ? ? 0 250 0 -6.30096865 0.00183116761 0.0026442381917314273 0 252 0 4.46767044 0.988656163 6.4619475094968903 1 254 1 5.268241 0.994873762 0.007414618780919654 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.312752 0.004904177 0.007092637921430613 0 271 0 -3.70832729 0.02393173 0.034946036680259647 0 272 1 3.75608158 0.9771588 0.033335080773514915 1 -275 0 +275 0 ? ? ? 0 276 0 -5.145169 0.005793728 0.0083828901690503332 0 277 0 -5.947381 0.002605866 0.0037643767357215252 0 278 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.312752 0.004904177 0.007092637921430613 0 293 1 4.889887 0.992533863 0.0108117709104356 1 296 0 1.11456966 0.752980053 2.0173005524167049 1 -297 0 +297 0 ? ? ? 0 299 1 7.92421341 0.999638259 0.00052197576964742038 1 300 1 5.01128674 0.993381739 0.009579868932513079 1 301 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.64061546 0.974434555 0.037362801100438832 1 317 1 8.889477 0.9998622 0.00019882564438936136 1 319 0 2.932189 0.9494149 4.3051439460642333 1 -321 0 +321 0 ? ? ? 0 323 1 5.11123276 0.994007468 0.0086714037349760222 1 327 0 -5.947381 0.002605866 0.0037643767357215252 0 328 1 3.24846554 0.962617934 0.054964793658751787 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 16.72465 0.99999994 8.5991327994145617E-08 1 613 0 -5.02813625 0.00650837226 0.0094202851039732072 0 614 0 -5.6638155 0.00345726055 0.0049964145907412149 0 -617 0 +617 0 ? ? ? 0 618 0 -4.744063 0.008628122 0.012501759455550775 0 619 0 -4.34295654 0.01283126 0.01863138517568386 0 621 0 0.373829842 0.5923841 1.2947177664921086 1 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.23449326 0.0142802689 0.020750590335179415 0 19 0 -3.10273981 0.04299438 0.063400697943788428 0 22 0 -5.130806 0.00587705 0.0085038042327941973 0 -23 1 +23 1 ? ? ? 0 24 0 -5.932124 0.002645822 0.0038221729188529165 0 26 0 -5.0699935 0.006243238 0.0090353231046563232 0 27 0 -3.99905157 0.01800297 0.026209433910245255 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.12534046 0.00590906851 0.0085502708566916 0 135 0 -2.97098112 0.0487542 0.072109916649189282 0 136 0 -4.56492853 0.0103033585 0.014941712057931327 0 -139 0 +139 0 ? ? ? 0 140 0 -5.461241 0.00423030974 0.0061159922992663762 0 142 1 3.05590534 0.9550368 0.066371741280747978 1 143 0 -5.35564375 0.004699242 0.0067955523991724961 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.276688 0.0136983329 0.019899122722508662 0 155 1 1.77740765 0.8553765 0.22536855303918271 1 157 0 -5.366247 0.004649908 0.0067240441834850199 0 -158 0 +158 0 ? ? ? 0 159 1 8.96132851 0.999871731 0.0001850652016644095 1 160 1 6.856139 0.998948157 0.001518287560141836 1 162 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.663686 0.9965423 0.0049970870434534837 1 232 0 0.928171158 0.71670413 1.8196185260804569 1 234 0 -3.391735 0.0325547643 0.047748098148187916 0 -235 0 +235 0 ? ? ? 0 236 1 7.88262749 0.9996229 0.00054416973071506056 1 238 1 8.651073 0.9998251 0.00025232061237670696 1 243 0 -4.03390265 0.0173970815 0.025319570708974903 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.9212322 0.99998194 2.6055606891255495E-05 1 287 0 -5.12534046 0.00590906851 0.0085502708566916 0 289 1 5.185871 0.9944361 0.0080494462952667781 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77450371 0.9916284 0.012128492162908918 1 298 0 -2.54918242 0.07248143 0.10855193009256793 0 302 1 11.1919193 0.999986231 1.9864132926342996E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.932124 0.002645822 0.0038221729188529165 0 310 0 -5.691217 0.00336412713 0.0048615916252809817 0 313 0 -6.59299469 0.0013680571 0.0019750404849533288 0 -315 0 +315 0 ? ? ? 0 318 0 -5.91572762 0.00268944423 0.0038852748092412133 0 320 1 4.1229 0.9840607 0.023180779648038535 1 322 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.26255941 0.00190273311 0.0027476785076986981 0 408 0 -4.386099 0.0122961234 0.017849522550602435 0 410 0 -6.26255941 0.00190273311 0.0027476785076986981 0 -411 0 +411 0 ? ? ? 0 412 1 7.037607 0.99912256 0.0012664339928697493 1 417 0 -6.26255941 0.00190273311 0.0027476785076986981 0 420 0 -3.338777 0.0342646 0.050300135586962377 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt index 0a4ce514a2..3b22d36b5d 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.01161575 0.999099433 0.0012998283148002473 1 35 0 -5.546275 0.0038867984 0.0056183906346792553 0 37 0 -0.627175331 0.3481513 0.61739094617065504 0 -40 0 +40 0 ? ? ? 0 41 1 3.0365572 0.9541986 0.067638527006529184 1 44 1 6.61341858 0.998659551 0.0019351561965343684 1 45 0 -5.565003 0.00381495967 0.0055143486004666058 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.72209072 0.008818108 0.012778263407408707 0 141 0 -6.18090439 0.002064286 0.0029812133205916491 0 144 0 -5.546275 0.0038867984 0.0056183906346792553 0 -145 0 +145 0 ? ? ? 0 147 0 -5.305633 0.0049390397 0.0071431828757896592 0 150 0 -5.43029261 0.004362697 0.0063078105710409225 0 151 1 4.990402 0.993243039 0.0097813179599174817 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.58201027 0.00138314639 0.0019968397134892645 0 156 0 -5.6663394 0.00344857574 0.0049838416366178697 0 161 0 -3.992939 0.0181113519 0.026368671070599831 0 -164 0 +164 0 ? ? ? 0 167 1 6.79999542 0.9988875 0.0016059215653949079 1 169 0 -6.41288567 0.00163759827 0.0023644914709276374 0 171 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.4554977 0.9999712 4.1534408054175585E-05 1 247 1 2.7647686 0.940742 0.088128954823708028 0 248 0 -3.52696 0.0285547972 0.041795476162409473 0 -249 0 +249 0 ? ? ? 0 250 0 -6.30096865 0.00183116761 0.0026442381917314273 0 252 0 4.46767044 0.988656163 6.4619475094968903 1 254 1 5.268241 0.994873762 0.007414618780919654 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.312752 0.004904177 0.007092637921430613 0 271 0 -3.70832729 0.02393173 0.034946036680259647 0 272 1 3.75608158 0.9771588 0.033335080773514915 1 -275 0 +275 0 ? ? ? 0 276 0 -5.145169 0.005793728 0.0083828901690503332 0 277 0 -5.947381 0.002605866 0.0037643767357215252 0 278 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.312752 0.004904177 0.007092637921430613 0 293 1 4.889887 0.992533863 0.0108117709104356 1 296 0 1.11456966 0.752980053 2.0173005524167049 0 -297 0 +297 0 ? ? ? 0 299 1 7.92421341 0.999638259 0.00052197576964742038 1 300 1 5.01128674 0.993381739 0.009579868932513079 1 301 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.64061546 0.974434555 0.037362801100438832 1 317 1 8.889477 0.9998622 0.00019882564438936136 1 319 0 2.932189 0.9494149 4.3051439460642333 0 -321 0 +321 0 ? ? ? 0 323 1 5.11123276 0.994007468 0.0086714037349760222 1 327 0 -5.947381 0.002605866 0.0037643767357215252 0 328 1 3.24846554 0.962617934 0.054964793658751787 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 16.72465 0.99999994 8.5991327994145617E-08 1 613 0 -5.02813625 0.00650837226 0.0094202851039732072 0 614 0 -5.6638155 0.00345726055 0.0049964145907412149 0 -617 0 +617 0 ? ? ? 0 618 0 -4.744063 0.008628122 0.012501759455550775 0 619 0 -4.34295654 0.01283126 0.01863138517568386 0 621 0 0.373829842 0.5923841 1.2947177664921086 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.23449326 0.0142802689 0.020750590335179415 0 19 0 -3.10273981 0.04299438 0.063400697943788428 0 22 0 -5.130806 0.00587705 0.0085038042327941973 0 -23 1 +23 1 ? ? ? 0 24 0 -5.932124 0.002645822 0.0038221729188529165 0 26 0 -5.0699935 0.006243238 0.0090353231046563232 0 27 0 -3.99905157 0.01800297 0.026209433910245255 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.12534046 0.00590906851 0.0085502708566916 0 135 0 -2.97098112 0.0487542 0.072109916649189282 0 136 0 -4.56492853 0.0103033585 0.014941712057931327 0 -139 0 +139 0 ? ? ? 0 140 0 -5.461241 0.00423030974 0.0061159922992663762 0 142 1 3.05590534 0.9550368 0.066371741280747978 1 143 0 -5.35564375 0.004699242 0.0067955523991724961 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.276688 0.0136983329 0.019899122722508662 0 155 1 1.77740765 0.8553765 0.22536855303918271 0 157 0 -5.366247 0.004649908 0.0067240441834850199 0 -158 0 +158 0 ? ? ? 0 159 1 8.96132851 0.999871731 0.0001850652016644095 1 160 1 6.856139 0.998948157 0.001518287560141836 1 162 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.663686 0.9965423 0.0049970870434534837 1 232 0 0.928171158 0.71670413 1.8196185260804569 0 234 0 -3.391735 0.0325547643 0.047748098148187916 0 -235 0 +235 0 ? ? ? 0 236 1 7.88262749 0.9996229 0.00054416973071506056 1 238 1 8.651073 0.9998251 0.00025232061237670696 1 243 0 -4.03390265 0.0173970815 0.025319570708974903 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.9212322 0.99998194 2.6055606891255495E-05 1 287 0 -5.12534046 0.00590906851 0.0085502708566916 0 289 1 5.185871 0.9944361 0.0080494462952667781 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77450371 0.9916284 0.012128492162908918 1 298 0 -2.54918242 0.07248143 0.10855193009256793 0 302 1 11.1919193 0.999986231 1.9864132926342996E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.932124 0.002645822 0.0038221729188529165 0 310 0 -5.691217 0.00336412713 0.0048615916252809817 0 313 0 -6.59299469 0.0013680571 0.0019750404849533288 0 -315 0 +315 0 ? ? ? 0 318 0 -5.91572762 0.00268944423 0.0038852748092412133 0 320 1 4.1229 0.9840607 0.023180779648038535 1 322 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.26255941 0.00190273311 0.0027476785076986981 0 408 0 -4.386099 0.0122961234 0.017849522550602435 0 410 0 -6.26255941 0.00190273311 0.0027476785076986981 0 -411 0 +411 0 ? ? ? 0 412 1 7.037607 0.99912256 0.0012664339928697493 1 417 0 -6.26255941 0.00190273311 0.0027476785076986981 0 420 0 -3.338777 0.0342646 0.050300135586962377 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt index 2748bad72f..db7cabc84e 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 5.917115 0.9973143 0.003879895635620469 1 35 0 -5.023262 0.006539965 0.0094661631818091386 0 37 0 -1.38112783 0.200827926 0.3234219246434899 0 -40 0 +40 0 ? ? ? 0 41 1 1.99977684 0.880773664 0.18315676412764836 1 44 1 6.1460495 0.997862637 0.0030868629883528767 1 45 0 -4.92853165 0.00718512246 0.010403360173725132 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.17745543 0.0151058007 0.021959341229968121 0 141 0 -5.50801945 0.00403775927 0.0058370475132605874 0 144 0 -5.023262 0.006539965 0.0094661631818091386 0 -145 0 +145 0 ? ? ? 0 147 0 -4.81479263 0.00804367848 0.011651498579728541 0 150 0 -4.92451668 0.00721382024 0.01044506254471955 0 151 1 4.048602 0.9828524 0.024953319944645107 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -5.80309772 0.00300910859 0.0043477707884351756 0 156 0 -4.933721 0.00714819832 0.010349705365439381 0 161 0 -3.81854677 0.0214878246 0.031338292831631254 0 -164 0 +164 0 ? ? ? 0 167 1 7.07203674 0.999152243 0.0012235733412714462 1 169 0 -5.652446 0.00349665456 0.0050534464360921993 0 171 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.218597 0.9999008 0.00014309666195095306 1 247 1 1.9463377 0.87504673 0.19256803177214576 1 248 0 -3.2993412 0.0355937965 0.052287163877458576 0 -249 0 +249 0 ? ? ? 0 250 0 -5.418478 0.004414317 0.006382611353481173 0 252 0 3.48067 0.970132768 5.0652926577610451 1 254 1 4.863985 0.9923395 0.011094325676462969 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.83358335 0.007895125 0.011435459622816726 0 271 0 -3.65327024 0.0252520833 0.036898928715096181 0 272 1 3.02218533 0.9535664 0.068594734962942897 1 -275 0 +275 0 ? ? ? 0 276 0 -4.72818375 0.008765012 0.012700983189760973 0 277 0 -5.3183403 0.00487697963 0.00705320751084503 0 278 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.83358335 0.007895125 0.011435459622816726 0 293 1 3.6722765 0.975211561 0.036212866280089365 1 296 0 0.9506779 0.7212515 1.8429639897619017 1 -297 0 +297 0 ? ? ? 0 299 1 5.993658 0.9975117 0.0035943536619517991 1 300 1 5.41301346 0.9955616 0.0064175103553020287 1 301 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.857479 0.945704 0.080539421519246923 1 317 1 7.483817 0.9994382 0.00081069597738820146 1 319 0 2.14308548 0.895020843 3.2518251711052208 1 -321 0 +321 0 ? ? ? 0 323 1 4.38443756 0.987683654 0.017879061305842645 1 327 0 -5.3183403 0.00487697963 0.00705320751084503 0 328 1 1.9153614 0.8716203 0.19822830198625993 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 15.0148582 0.9999997 4.2995669122556443E-07 1 613 0 -4.76532364 0.008448151 0.012239880753341762 0 614 0 -5.114196 0.005974896 0.00864580721752186 0 -617 0 +617 0 ? ? ? 0 618 0 -4.43310547 0.0117381271 0.017034712310540854 0 619 0 -4.138027 0.0157037526 0.022835500585175472 0 621 0 -0.147964478 0.463076234 0.89721082981393785 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.094967 0.0163834114 0.023832028779250634 0 19 0 -3.158533 0.0407563634 0.06003080556474015 0 22 0 -4.786547 0.008272208 0.011983909171361835 0 -23 1 +23 1 ? ? ? 0 24 0 -5.49961758 0.00407168828 0.0058861960152521283 0 26 0 -4.637971 0.009584561 0.013894290826829943 0 27 0 -3.85011339 0.02083403 0.030374676365783088 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.786547 0.008272208 0.011983909171361835 0 135 0 -2.91646814 0.0513454638 0.07604528657651706 0 136 0 -4.31833 0.01314697 0.019092852440504127 0 -139 0 +139 0 ? ? ? 0 140 0 -5.00991058 0.006627286 0.009592975357514695 0 142 1 2.566639 0.9286834 0.10674124733849408 1 143 0 -4.68576 0.009141385 0.013248879501449318 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.91140127 0.0196197983 0.028586744893093978 0 155 1 1.73489761 0.8500378 0.23440107518588552 1 157 0 -5.03140068 0.00648729829 0.0093896829482070059 0 -158 0 +158 0 ? ? ? 0 159 1 8.209714 0.9997281 0.00039234577165867646 1 160 1 6.214038 0.9980028 0.0028841924162386562 1 162 0 -4.563184 0.0103211654 0.014967669647872791 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.128908 0.994111836 0.0085199331359509835 1 232 0 0.387944221 0.5957877 1.3068148861794051 1 234 0 -3.05071926 0.04518643 0.066709024779774861 0 -235 0 +235 0 ? ? ? 0 236 1 7.45407867 0.999421239 0.00083521748895634131 1 238 1 8.351737 0.9997641 0.00034039381967173406 1 243 0 -3.79230738 0.02204652 0.032162254419967784 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.2351351 0.9999641 5.1767706679585828E-05 1 287 0 -4.786547 0.008272208 0.011983909171361835 0 289 1 5.007719 0.993358254 0.0096139756423385195 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.08445454 0.9834463 0.024081816959951673 1 298 0 -2.520249 0.07445079 0.11161839829444895 0 302 1 10.2925406 0.999966145 4.8843899665517181E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.49961758 0.00407168828 0.0058861960152521283 0 310 0 -5.254764 0.00519544445 0.0075149807235167431 0 313 0 -5.94634438 0.00260856142 0.0037682756571934027 0 -315 0 +315 0 ? ? ? 0 318 0 -5.49961758 0.00407168828 0.0058861960152521283 0 320 1 3.84560966 0.9790739 0.030510363432469476 1 322 0 -4.563184 0.0103211654 0.014967669647872791 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.722981 0.003259291 0.00470984242704696 0 408 0 -3.98740244 0.0182100739 0.026513731287650329 0 410 0 -5.722981 0.003259291 0.00470984242704696 0 -411 0 +411 0 ? ? ? 0 412 1 5.972435 0.997458458 0.0036713375244963246 1 417 0 -5.722981 0.003259291 0.00470984242704696 0 420 0 -3.14838743 0.0411548652 0.060630273661156198 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt index c81880f594..3e137abda1 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 5.678712 0.9965937 0.0049226209615764287 1 21 1 6.001464 0.997531 0.0035664232425213858 1 22 0 -5.02218437 0.00654697046 0.009476336418019371 0 -23 1 +23 1 ? ? ? 0 24 0 -5.557753 0.00384261133 0.0055543948499928858 0 25 1 0.7356682 0.676047862 0.56480270753052697 1 26 0 -5.01491547 0.006594418 0.0095452416271748757 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.86437464 0.134193972 0.20788424941725006 0 38 1 4.26738262 0.986175358 0.020083890425044392 1 39 1 0.948868752 0.7208876 0.47215375855428471 1 -40 0 +40 0 ? ? ? 0 41 1 2.21941185 0.9019792 0.14883391736209772 1 42 1 6.171118 0.997915447 0.00301051350343931 1 43 1 -0.406847954 0.399668157 1.3231254618912542 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.573848 0.0102128033 0.014809714349895977 0 137 0 -5.38329029 0.00457168929 0.0066106759896893642 0 138 0 -4.32377768 0.0130764758 0.018989799023276886 0 -139 0 +139 0 ? ? ? 0 140 0 -5.38329029 0.00457168929 0.0066106759896893642 0 141 0 -5.83162737 0.00292472052 0.0042256622529479159 0 142 1 3.1618576 0.9593734 0.059835633747694031 1 143 0 -4.83655453 0.007871887 0.011401667353256314 0 144 0 -5.470522 0.004191393 0.00605960993587892 0 -145 0 +145 0 ? ? ? 0 146 1 0.0557060242 0.5139229 0.96037607240274647 1 147 0 -5.47995234 0.00415221555 0.0060028519951750492 0 148 0 -1.59599066 0.168542728 0.26628596824729922 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.20912266 0.901065767 0.15029568586304221 1 156 0 -5.58346558 0.003745433 0.0054136621278108544 0 157 0 -5.109416 0.006003351 0.0086871066895572498 0 -158 0 +158 0 ? ? ? 0 159 1 9.879018 0.99994874 7.3954435339176224E-05 1 160 1 6.966673 0.9990581 0.0013594752512588889 1 161 0 -4.01959 0.0176434461 0.025681337989317106 0 162 0 -4.661079 0.009367671 0.01357839101244592 0 163 0 -3.39304066 0.03251367 0.04768681882905737 0 -164 0 +164 0 ? ? ? 0 165 0 -3.65873766 0.0251178537 0.036700273114887771 0 166 1 6.106388 0.9977764 0.0032115643460085509 1 167 1 6.96164227 0.999053359 0.0013663610592643267 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.434880257 0.607038438 1.3475398955993996 1 233 1 4.62145329 0.9902574 0.014124543390739288 1 234 0 -3.19655 0.0392957628 0.057835744186683809 0 -235 0 +235 0 ? ? ? 0 236 1 9.488486 0.9999243 0.00010921311695177715 1 237 1 5.185233 0.994432569 0.0080545481788837064 1 238 1 10.3383093 0.999967635 4.6694045347237877E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.706185 0.9998345 0.00023881767606976273 1 247 1 1.85991 0.865286469 0.20875025156266316 1 248 0 -3.24698353 0.03743543 0.055044774760243595 0 -249 0 +249 0 ? ? ? 0 250 0 -5.9445715 0.00261317822 0.0037749537255417041 0 251 1 6.851535 0.998943269 0.001525346290754842 1 252 0 3.15871048 0.959250569 4.6170762873605469 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.54124355 0.8236454 0.27990471757503477 1 273 1 -0.485281944 0.381005645 1.3921157227274092 0 274 0 -4.467927 0.0113409776 0.016455057773140165 0 -275 0 +275 0 ? ? ? 0 276 0 -5.02218437 0.00654697046 0.009476336418019371 0 277 0 -5.918859 0.00268105837 0.0038731439944547637 0 278 0 -5.557753 0.00384261133 0.0055543948499928858 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.391654 0.9983273 0.0024151950596217639 1 290 0 -6.27996445 0.00186996383 0.0027003130775063206 0 291 0 -5.557753 0.00384261133 0.0055543948499928858 0 -292 1 +292 1 ? ? ? 0 293 1 4.062559 0.98308605 0.024610393646554929 1 -294 0 +294 0 ? ? ? 0 295 1 5.48979473 0.9958883 0.0059441683338054239 1 296 0 0.7984762 0.689648449 1.6880247409542737 1 -297 0 +297 0 ? ? ? 0 298 0 -2.82211971 0.05614051 0.083355987427050499 0 299 1 5.92269325 0.9973292 0.0038583400725341575 1 300 1 5.553643 0.9961416 0.0055772429838620297 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.505681 0.924539149 0.11319368429557292 1 313 0 -6.27996445 0.00186996383 0.0027003130775063206 0 314 0 -5.996913 0.00248024915 0.0035826879721537609 0 -315 0 +315 0 ? ? ? 0 316 1 2.06765842 0.8877198 0.17182370265537225 1 317 1 6.799261 0.998886645 0.0016071267852858751 1 318 0 -5.323591 0.004851562 0.0070163583406618974 0 319 0 1.16352749 0.7619731 2.0708033667370818 1 320 1 5.47767448 0.995838344 0.006016528395062894 1 -321 0 +321 0 ? ? ? 0 322 0 -4.661079 0.009367671 0.01357839101244592 0 323 1 3.7019043 0.975917757 0.035168522105486828 1 324 0 -5.557753 0.00384261133 0.0055543948499928858 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.06660128 0.0168468487 0.024511924018010906 0 409 0 -4.77211475 0.008391453 0.012157388278315496 0 410 0 -5.918859 0.00268105837 0.0038731439944547637 0 -411 0 +411 0 ? ? ? 0 412 1 6.93106556 0.999024 0.0014087955764285343 1 413 0 -3.5143342 0.02890712 0.042318806680098388 0 414 1 4.808298 0.9919043 0.0117271336749766 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -5.548576 0.00387790077 0.0056055040477979955 0 615 0 -4.073707 0.0167295579 0.024339819893712551 0 616 0 -5.02218437 0.00654697046 0.009476336418019371 0 -617 0 +617 0 ? ? ? 0 618 0 -4.573848 0.0102128033 0.014809714349895977 0 619 0 -4.12551069 0.0158983991 0.023120824431946189 0 620 0 -5.02218437 0.00654697046 0.009476336418019371 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt index 4789a49c72..51deeac562 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 7.041827 0.999126256 0.0012610978584151201 1 21 1 7.16006565 0.9992236 0.0011205580676841706 1 22 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -23 1 +23 1 ? ? ? 0 24 0 -6.16573143 0.00209578 0.003026744365055557 0 25 1 1.140585 0.757787049 0.4001356116356401 1 26 0 -5.39192247 0.00453257374 0.006553986122637143 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.74917936 0.148150727 0.23132991391158644 0 38 1 5.25044537 0.9947822 0.0075473881419006704 1 39 1 1.14784718 0.7591175 0.39760491464649406 1 -40 0 +40 0 ? ? ? 0 41 1 2.83779049 0.9446841 0.082096136496848479 1 42 1 5.84602737 0.997117 0.0041653216557672994 1 43 1 -0.115566254 0.471140563 1.0857705467412089 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.81180668 0.008067538 0.011686200181083801 0 137 0 -5.771742 0.003104659 0.0044860436134326774 0 138 0 -4.708121 0.008941052 0.01295722303877558 0 -139 0 +139 0 ? ? ? 0 140 0 -5.771742 0.003104659 0.0044860436134326774 0 141 0 -6.35020638 0.00174334215 0.0025173059685722476 0 142 1 3.59143162 0.9731803 0.039220987499558525 1 143 0 -5.32383156 0.00485040154 0.0070146760362167319 0 144 0 -5.96873665 0.00255094632 0.0036849396417204128 0 -145 0 +145 0 ? ? ? 0 146 1 -0.228344917 0.443160534 1.1740986883981319 0 147 0 -6.02899933 0.00240211817 0.0034696929544304877 0 148 0 -1.17236042 0.236428589 0.38916500660996345 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.561039 0.928311646 0.10731887720885433 1 156 0 -6.32817459 0.0017821081 0.0025733321754180501 0 157 0 -5.587267 0.00373127544 0.0053931605303660906 0 -158 0 +158 0 ? ? ? 0 159 1 9.957122 0.9999526 6.836472348564471E-05 1 160 1 7.81574535 0.999596834 0.00058176260649806227 1 161 0 -4.194911 0.0148482891 0.02158218175164682 0 162 0 -5.008802 0.00663458835 0.0096035809636014526 0 163 0 -4.450941 0.0115330191 0.016735320470494343 0 -164 0 +164 0 ? ? ? 0 165 0 -3.79370117 0.022016488 0.0321179521831969 0 166 1 6.78795338 0.998874 0.0016253773809814112 1 167 1 6.650817 0.9987087 0.0018641198790525675 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.440856934 0.6084632 1.3527802863415537 1 233 1 5.59169 0.996285141 0.0053693888063565016 1 234 0 -3.380312 0.0329164639 0.048287580681296273 0 -235 0 +235 0 ? ? ? 0 236 1 9.42349148 0.9999192 0.00011660894974341421 1 237 1 5.527053 0.9960381 0.005727196256675989 1 238 1 10.3295126 0.999967337 4.7124015954602722E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.522981 0.999926865 0.00010551521477366078 1 247 1 2.22113419 0.9021314 0.14859054441592434 1 248 0 -3.25320721 0.037211813 0.0547096542674552 0 -249 0 +249 0 ? ? ? 0 250 0 -6.70964432 0.00121761323 0.0017577148937104486 0 251 1 8.275113 0.9997453 0.00036748773339945429 1 252 0 3.75820923 0.97720623 5.4552166396980901 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.25216866 0.777675033 0.36276067331990891 1 273 1 -0.4770708 0.382944047 1.3847944817499098 0 274 0 -4.773376 0.008380964 0.01214212852075712 0 -275 0 +275 0 ? ? ? 0 276 0 -5.39027166 0.00454002852 0.0065647900946516503 0 277 0 -6.547201 0.00143207016 0.0020675212884715419 0 278 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.07017231 0.999150634 0.0012258970788924348 1 290 0 -6.928671 0.0009783435 0.0014121422633917285 0 291 0 -6.16573143 0.00209578 0.003026744365055557 0 -292 1 +292 1 ? ? ? 0 293 1 5.206727 0.9945503 0.0078837745923848179 1 -294 0 +294 0 ? ? ? 0 295 1 6.0751133 0.9977059 0.0033135223937081622 1 296 0 1.04628563 0.740061 1.9437549070362801 1 -297 0 +297 0 ? ? ? 0 298 0 -2.41310167 0.08217907 0.12371538777024385 0 299 1 7.396323 0.999386847 0.00088486407096525718 1 300 1 5.69218445 0.996639132 0.0048568731086720204 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 3.90498447 0.9802564 0.02876897031816629 1 313 0 -6.928671 0.0009783435 0.0014121422633917285 0 314 0 -6.664193 0.00127416 0.0018393963949252371 0 -315 0 +315 0 ? ? ? 0 316 1 2.20020485 0.9002679 0.15157371666802275 1 317 1 7.75578 0.9995719 0.00061772192605728435 1 318 0 -5.81475639 0.002974334 0.0042974510254079716 0 319 0 1.38163567 0.7992536 2.3165538563686012 1 320 1 5.837037 0.997091 0.0042029227668842466 1 -321 0 +321 0 ? ? ? 0 322 0 -5.008802 0.00663458835 0.0096035809636014526 0 323 1 4.26100159 0.9860881 0.020211552173447847 1 324 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.4899044 0.0110971872 0.016099351806882618 0 409 0 -5.286586 0.00503353868 0.0072801992795113032 0 410 0 -6.547201 0.00143207016 0.0020675212884715419 0 -411 0 +411 0 ? ? ? 0 412 1 7.575248 0.9994873 0.00073988707471124668 1 413 0 -3.748186 0.0230181254 0.033596298028071243 0 414 1 5.274519 0.994905651 0.0073683771133106393 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.085728 0.00226994278 0.0032785576690230068 0 615 0 -4.604435 0.0099082 0.01436579907268937 0 616 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -617 0 +617 0 ? ? ? 0 618 0 -4.81180668 0.008067538 0.011686200181083801 0 619 0 -4.233342 0.0142964814 0.020774319020048612 0 620 0 -5.39027166 0.00454002852 0.0065647900946516503 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt index 92afd941f0..a1bd6936bc 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 7.041827 0.999126256 0.0012610978584151201 1 21 1 7.16006565 0.9992236 0.0011205580676841706 1 22 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -23 1 +23 1 ? ? ? 0 24 0 -6.16573143 0.00209578 0.003026744365055557 0 25 1 1.140585 0.757787049 0.4001356116356401 0 26 0 -5.39192247 0.00453257374 0.006553986122637143 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.74917936 0.148150727 0.23132991391158644 0 38 1 5.25044537 0.9947822 0.0075473881419006704 1 39 1 1.14784718 0.7591175 0.39760491464649406 0 -40 0 +40 0 ? ? ? 0 41 1 2.83779049 0.9446841 0.082096136496848479 0 42 1 5.84602737 0.997117 0.0041653216557672994 1 43 1 -0.115566254 0.471140563 1.0857705467412089 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.81180668 0.008067538 0.011686200181083801 0 137 0 -5.771742 0.003104659 0.0044860436134326774 0 138 0 -4.708121 0.008941052 0.01295722303877558 0 -139 0 +139 0 ? ? ? 0 140 0 -5.771742 0.003104659 0.0044860436134326774 0 141 0 -6.35020638 0.00174334215 0.0025173059685722476 0 142 1 3.59143162 0.9731803 0.039220987499558525 1 143 0 -5.32383156 0.00485040154 0.0070146760362167319 0 144 0 -5.96873665 0.00255094632 0.0036849396417204128 0 -145 0 +145 0 ? ? ? 0 146 1 -0.228344917 0.443160534 1.1740986883981319 0 147 0 -6.02899933 0.00240211817 0.0034696929544304877 0 148 0 -1.17236042 0.236428589 0.38916500660996345 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.561039 0.928311646 0.10731887720885433 0 156 0 -6.32817459 0.0017821081 0.0025733321754180501 0 157 0 -5.587267 0.00373127544 0.0053931605303660906 0 -158 0 +158 0 ? ? ? 0 159 1 9.957122 0.9999526 6.836472348564471E-05 1 160 1 7.81574535 0.999596834 0.00058176260649806227 1 161 0 -4.194911 0.0148482891 0.02158218175164682 0 162 0 -5.008802 0.00663458835 0.0096035809636014526 0 163 0 -4.450941 0.0115330191 0.016735320470494343 0 -164 0 +164 0 ? ? ? 0 165 0 -3.79370117 0.022016488 0.0321179521831969 0 166 1 6.78795338 0.998874 0.0016253773809814112 1 167 1 6.650817 0.9987087 0.0018641198790525675 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.440856934 0.6084632 1.3527802863415537 0 233 1 5.59169 0.996285141 0.0053693888063565016 1 234 0 -3.380312 0.0329164639 0.048287580681296273 0 -235 0 +235 0 ? ? ? 0 236 1 9.42349148 0.9999192 0.00011660894974341421 1 237 1 5.527053 0.9960381 0.005727196256675989 1 238 1 10.3295126 0.999967337 4.7124015954602722E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.522981 0.999926865 0.00010551521477366078 1 247 1 2.22113419 0.9021314 0.14859054441592434 0 248 0 -3.25320721 0.037211813 0.0547096542674552 0 -249 0 +249 0 ? ? ? 0 250 0 -6.70964432 0.00121761323 0.0017577148937104486 0 251 1 8.275113 0.9997453 0.00036748773339945429 1 252 0 3.75820923 0.97720623 5.4552166396980901 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.25216866 0.777675033 0.36276067331990891 0 273 1 -0.4770708 0.382944047 1.3847944817499098 0 274 0 -4.773376 0.008380964 0.01214212852075712 0 -275 0 +275 0 ? ? ? 0 276 0 -5.39027166 0.00454002852 0.0065647900946516503 0 277 0 -6.547201 0.00143207016 0.0020675212884715419 0 278 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.07017231 0.999150634 0.0012258970788924348 1 290 0 -6.928671 0.0009783435 0.0014121422633917285 0 291 0 -6.16573143 0.00209578 0.003026744365055557 0 -292 1 +292 1 ? ? ? 0 293 1 5.206727 0.9945503 0.0078837745923848179 1 -294 0 +294 0 ? ? ? 0 295 1 6.0751133 0.9977059 0.0033135223937081622 1 296 0 1.04628563 0.740061 1.9437549070362801 0 -297 0 +297 0 ? ? ? 0 298 0 -2.41310167 0.08217907 0.12371538777024385 0 299 1 7.396323 0.999386847 0.00088486407096525718 1 300 1 5.69218445 0.996639132 0.0048568731086720204 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 3.90498447 0.9802564 0.02876897031816629 1 313 0 -6.928671 0.0009783435 0.0014121422633917285 0 314 0 -6.664193 0.00127416 0.0018393963949252371 0 -315 0 +315 0 ? ? ? 0 316 1 2.20020485 0.9002679 0.15157371666802275 0 317 1 7.75578 0.9995719 0.00061772192605728435 1 318 0 -5.81475639 0.002974334 0.0042974510254079716 0 319 0 1.38163567 0.7992536 2.3165538563686012 0 320 1 5.837037 0.997091 0.0042029227668842466 1 -321 0 +321 0 ? ? ? 0 322 0 -5.008802 0.00663458835 0.0096035809636014526 0 323 1 4.26100159 0.9860881 0.020211552173447847 1 324 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.4899044 0.0110971872 0.016099351806882618 0 409 0 -5.286586 0.00503353868 0.0072801992795113032 0 410 0 -6.547201 0.00143207016 0.0020675212884715419 0 -411 0 +411 0 ? ? ? 0 412 1 7.575248 0.9994873 0.00073988707471124668 1 413 0 -3.748186 0.0230181254 0.033596298028071243 0 414 1 5.274519 0.994905651 0.0073683771133106393 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.085728 0.00226994278 0.0032785576690230068 0 615 0 -4.604435 0.0099082 0.01436579907268937 0 616 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -617 0 +617 0 ? ? ? 0 618 0 -4.81180668 0.008067538 0.011686200181083801 0 619 0 -4.233342 0.0142964814 0.020774319020048612 0 620 0 -5.39027166 0.00454002852 0.0065647900946516503 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt index 946584d7cb..6f9ac3e2b6 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 4.455554 0.98851943 0.016658771299360001 1 35 0 -5.20966339 0.005433825 0.0078607284616035548 0 37 0 -1.95550251 0.1239546 0.19092245979854006 0 -40 0 +40 0 ? ? ? 0 41 1 1.363338 0.796301663 0.32861302447496599 1 44 1 2.93189621 0.949400842 0.074910765340690438 1 45 0 -5.4227767 0.00439546537 0.0063552935777515963 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.283581 0.01360552 0.019763368834071083 0 141 0 -5.903559 0.0027222808 0.0039327765009182425 0 144 0 -5.20966339 0.005433825 0.0078607284616035548 0 -145 0 +145 0 ? ? ? 0 147 0 -4.808229 0.008096219 0.011727915268260366 0 150 0 -4.955528 0.00699508563 0.010127237230146099 0 151 1 2.9794178 0.9516356 0.071518853245330763 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.343319 0.00175536959 0.0025346883174766728 0 156 0 -4.870145 0.00761383865 0.011026477645071437 0 161 0 -3.876839 0.0202957559 0.029581804866342248 0 -164 0 +164 0 ? ? ? 0 167 1 1.63665962 0.8370799 0.25656278900168672 1 169 0 -6.067718 0.00231109979 0.0033380709957833601 0 171 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 6.373105 0.9982961 0.0024603307167519865 1 247 1 1.08481979 0.747405 0.42003789409759523 1 248 0 -3.67766953 0.0246584155 0.036020526273661445 0 -249 0 +249 0 ? ? ? 0 250 0 -5.56404066 0.00381861837 0.0055196472135604522 0 252 0 2.67256546 0.935388267 3.9520600195019679 1 254 1 2.64337635 0.933601558 0.099121125939627047 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.955528 0.00699508563 0.010127237230146099 0 271 0 -3.63624859 0.0256744642 0.037524217400354012 0 272 1 2.26432514 0.905879 0.14260970234578763 1 -275 0 +275 0 ? ? ? 0 276 0 -5.20966339 0.005433825 0.0078607284616035548 0 277 0 -5.6494236 0.003507201 0.0050687153327134771 0 278 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.955528 0.00699508563 0.010127237230146099 0 293 1 2.05892181 0.886846 0.17324448166666195 1 296 0 1.99554062 0.880328059 3.0628431682446857 1 -297 0 +297 0 ? ? ? 0 299 1 3.69336033 0.9757162 0.035466552037373179 1 300 1 4.02837372 0.9825081 0.025458759934045509 1 301 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.94207382 0.9498875 0.074171415588786135 1 317 1 4.312872 0.986782 0.019196674360521217 1 319 0 2.41989422 0.9183318 3.6140817927103961 1 -321 0 +321 0 ? ? ? 0 323 1 3.56970787 0.972607434 0.040070476517068486 1 327 0 -5.6494236 0.003507201 0.0050687153327134771 0 328 1 1.70264626 0.845880032 0.24147502978012392 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 7.478863 0.9994354 0.00081473984708772015 1 613 0 -4.91670847 0.00726995664 0.010526641032229434 0 614 0 -5.20966339 0.005433825 0.0078607284616035548 0 -617 0 +617 0 ? ? ? 0 618 0 -4.769904 0.008409867 0.01218417928555986 0 619 0 -4.330144 0.0129945707 0.018870074173474232 0 621 0 -0.284356117 0.429386139 0.80941330227154873 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -3.99180722 0.0181314889 0.026398258875503611 0 19 0 -3.49091744 0.0295717642 0.043306566793261343 0 22 0 -4.74231339 0.008643099 0.012523555687056959 0 -23 1 +23 1 ? ? ? 0 24 0 -4.99358654 0.00673562335 0.0097503246472711684 0 26 0 -4.230923 0.01433061 0.020824270758417435 0 27 0 -3.74053383 0.0231908429 0.0338513700113664 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.74265528 0.00864017 0.012519293184591884 0 135 0 -2.40284228 0.08295622 0.12493748028384198 0 136 0 -4.24142361 0.0141830426 0.020608297294341209 0 -139 0 +139 0 ? ? ? 0 140 0 -4.99192953 0.006746718 0.0097664397610658042 0 142 1 1.883389 0.8679999 0.20423319963131603 1 143 0 -3.93245554 0.0192188919 0.027996904861385053 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.71085787 0.02387269 0.034858773809671437 0 155 1 2.1317234 0.8939485 0.1617363815870177 1 157 0 -4.492697 0.0110665858 0.016054708591916572 0 -158 0 +158 0 ? ? ? 0 159 1 7.019025 0.9991061 0.0012901886373564615 1 160 1 5.00210667 0.9933211 0.0096679074407147667 1 162 0 -4.492697 0.0110665858 0.016054708591916572 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 4.06135273 0.983066 0.02463978413547805 1 232 0 0.582720757 0.641693234 1.4807328100816015 1 234 0 -2.76217723 0.0594026 0.08835075047841523 0 -235 0 +235 0 ? ? ? 0 236 1 5.42756176 0.995625436 0.0063250060270779312 1 238 1 6.00247669 0.9975335 0.0035628026721853763 1 243 0 -3.18360639 0.039787326 0.058574116415727635 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.122633 0.999194 0.0011632435375058345 1 287 0 -4.74265528 0.00864017 0.012519293184591884 0 289 1 5.24414349 0.994749367 0.0075950186704868833 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 3.85042763 0.979172349 0.030365276805999493 1 298 0 -2.97889614 0.0483884327 0.07155528637722855 0 302 1 7.28154945 0.999312341 0.00099242318488484949 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -4.99358654 0.00673562335 0.0097503246472711684 0 310 0 -4.74265528 0.00864017 0.012519293184591884 0 313 0 -5.49281931 0.004099349 0.005926265742136525 0 -315 0 +315 0 ? ? ? 0 318 0 -4.994271 0.006731047 0.0097436773674529319 0 320 1 3.98940563 0.9818257 0.026461150371251282 1 322 0 -4.492697 0.0110665858 0.016054708591916572 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.243203 0.00525554 0.0076021355607147146 0 408 0 -2.50673819 0.07538716 0.11307869372093513 0 410 0 -5.243203 0.00525554 0.0076021355607147146 0 -411 0 +411 0 ? ? ? 0 412 1 4.41673565 0.9880704 0.017314215991460075 1 417 0 -5.243203 0.00525554 0.0076021355607147146 0 420 0 -2.416037 0.08195794 0.12336783697816152 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt index fc9c09274b..27120c9ca1 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 3.60211229 0.973457634 0.038809901663734138 1 21 1 4.16481066 0.9847049 0.022236640568523543 1 22 0 -5.531984 0.003942524 0.0056991017626305011 0 -23 1 +23 1 ? ? ? 0 24 0 -5.641634 0.00353453052 0.0051082826744376919 0 25 1 1.07475233 0.7454996 0.42372049536525624 1 26 0 -5.05423546 0.00634177 0.0091783743058498103 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -2.63470244 0.06693815 0.099955373797007113 0 38 1 3.11990023 0.9577062 0.062344933763709205 1 39 1 1.3332 0.791369438 0.33757674381727276 1 -40 0 +40 0 ? ? ? 0 41 1 1.76657629 0.8540314 0.22763900783247884 1 42 1 5.77468061 0.996904433 0.0044728861234643764 1 43 1 -0.160024166 0.4600791 1.1200461625954479 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.98228741 0.00681164 0.0098607417433267518 0 137 0 -5.97203064 0.00254257885 0.0036728371153443573 0 138 0 -4.3374877 0.0129007176 0.018732896643962105 0 -139 0 +139 0 ? ? ? 0 140 0 -5.97203064 0.00254257885 0.0036728371153443573 0 141 0 -5.97203064 0.00254257885 0.0036728371153443573 0 142 1 3.62155676 0.973955452 0.03807230821683618 1 143 0 -4.06941748 0.0168002676 0.024443571870852193 0 144 0 -5.531984 0.003942524 0.0056991017626305011 0 -145 0 +145 0 ? ? ? 0 146 1 0.845272541 0.69957453 0.51545032982981998 1 147 0 -5.264244 0.00514667667 0.0074442579843589417 0 148 0 -3.53539133 0.0283218417 0.041449555087460649 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.63197279 0.9328912 0.1002192761113103 1 156 0 -4.95913267 0.00697009 0.010090922390098837 0 157 0 -5.091937 0.006108559 0.0088398149066821179 0 -158 0 +158 0 ? ? ? 0 159 1 7.637605 0.9995183 0.00069514934090023593 1 160 1 5.102776 0.9939569 0.00874476585784471 1 161 0 -4.27338028 0.013743096 0.019964600611336557 0 162 0 -5.091937 0.006108559 0.0088398149066821179 0 163 0 -3.67683315 0.0246785376 0.036050290649489301 0 -164 0 +164 0 ? ? ? 0 165 0 -3.45594358 0.0305921026 0.044824259180207618 0 166 1 4.88854 0.9925239 0.010826239558709188 1 167 1 2.59618044 0.930615366 0.10374308707957469 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.465872765 0.6144064 1.3748470144034199 1 233 1 3.621717 0.9739595 0.038066304453576054 1 234 0 -3.43166828 0.0313202776 0.045908353174351067 0 -235 0 +235 0 ? ? ? 0 236 1 6.647678 0.9987047 0.001869974861439897 1 237 1 4.42246 0.9881377 0.017215962974508822 1 238 1 6.44672251 0.9984168 0.0022859116128351314 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 6.46864176 0.998451054 0.0022363890442921863 1 247 1 1.5234189 0.8210414 0.28447311579312373 1 248 0 -4.00451946 0.0179065578 0.026067797200325149 0 -249 0 +249 0 ? ? ? 0 250 0 -5.39917946 0.00449994765 0.0065067030783201637 0 251 1 4.104112 0.983763337 0.023616805142920666 1 252 0 2.75052214 0.939942837 4.0575198580425882 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.62500715 0.8354845 0.2593150232737797 1 273 1 0.228233814 0.556812048 0.84473766721050514 1 274 0 -4.82307673 0.007977848 0.011555758238394069 0 -275 0 +275 0 ? ? ? 0 276 0 -5.531984 0.003942524 0.0056991017626305011 0 277 0 -6.0816803 0.00227912888 0.003291840622802274 0 278 0 -5.641634 0.00353453052 0.0051082826744376919 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.68027353 0.996598959 0.004915027880645002 1 290 0 -6.521727 0.00146896509 0.0021208267355545722 0 291 0 -5.641634 0.00353453052 0.0051082826744376919 0 -292 1 +292 1 ? ? ? 0 293 1 2.365685 0.9141729 0.12946106222132911 1 -294 0 +294 0 ? ? ? 0 295 1 4.049787 0.982872367 0.024924010557578272 1 296 0 1.7836585 0.856148 2.7973428610745215 1 -297 0 +297 0 ? ? ? 0 298 0 -3.845192 0.0209346656 0.030522958750931656 0 299 1 2.974937 0.95142895 0.071832170969213474 1 300 1 4.1471324 0.9844364 0.022630102350657457 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.437553883 0.607676 0.71862571139451958 1 313 0 -6.521727 0.00146896509 0.0021208267355545722 0 314 0 -6.0816803 0.00227912888 0.003291840622802274 0 -315 0 +315 0 ? ? ? 0 316 1 3.03096247 0.953953445 0.068009233653927595 1 317 1 3.97896051 0.9816384 0.026736450275361392 1 318 0 -5.641634 0.00353453052 0.0051082826744376919 0 319 0 1.61403608 0.833970964 2.5904925250318187 1 320 1 5.17757845 0.99439 0.0081162910482616441 1 -321 0 +321 0 ? ? ? 0 322 0 -5.091937 0.006108559 0.0088398149066821179 0 323 1 3.22697878 0.961837 0.056135679390163992 1 324 0 -5.641634 0.00353453052 0.0051082826744376919 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.996994 0.0475618578 0.070302697725435973 0 409 0 -4.887184 0.007486166 0.010840883662554117 0 410 0 -6.0816803 0.00227912888 0.003291840622802274 0 -411 0 +411 0 ? ? ? 0 412 1 4.5649085 0.989696443 0.014942001227255185 1 413 0 -3.34774446 0.0339691 0.049858759088118251 0 414 1 3.84952021 0.9791539 0.030392501392593196 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -5.531984 0.003942524 0.0056991017626305011 0 615 0 -3.69268847 0.02429977 0.035490127405006046 0 616 0 -5.531984 0.003942524 0.0056991017626305011 0 -617 0 +617 0 ? ? ? 0 618 0 -4.98228741 0.00681164 0.0098607417433267518 0 619 0 -4.43259048 0.0117441025 0.01704343535923129 0 620 0 -5.531984 0.003942524 0.0056991017626305011 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt index c637c69d47..777b0c9e2e 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.291877 0.999319434 0.00098218321186686951 1 35 0 -5.869519 0.002816277 0.0040687610645090766 0 37 0 -1.02334213 0.2643769 0.44296132773823416 0 -40 0 +40 0 ? ? ? 0 41 1 2.78505516 0.9418629 0.086411051010362436 1 44 1 7.821086 0.999599 0.00057866567352766714 1 45 0 -5.776886 0.00308877858 0.0044630618032660361 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.724151 0.008800117 0.01275207813790242 0 141 0 -6.55827045 0.00141632778 0.0020447774455947432 0 144 0 -5.869519 0.002816277 0.0040687610645090766 0 -145 0 +145 0 ? ? ? 0 147 0 -5.710598 0.00329976762 0.0047684300667823433 0 150 0 -5.60911131 0.003650946 0.0052768404018184306 0 151 1 5.118675 0.994051635 0.0086073014434059885 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.9242 0.000982723 0.0014184666437026428 0 156 0 -5.82082272 0.00295639853 0.0042714987401086318 0 161 0 -4.27493 0.0137221068 0.019933897944770858 0 -164 0 +164 0 ? ? ? 0 167 1 8.728978 0.9998382 0.00023348534112652949 1 169 0 -6.667038 0.00127054506 0.0018341745113224558 0 171 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 11.3055124 0.9999877 1.7714321792245208E-05 1 247 1 2.306137 0.909384 0.1370384544406823 1 248 0 -3.57901382 0.027145749 0.039704412233602736 0 -249 0 +249 0 ? ? ? 0 250 0 -6.50957441 0.00148689921 0.0021467385111997687 0 252 0 4.227894 0.9856265 6.1204466632636887 1 254 1 5.1967 0.9944957 0.0079629764358839714 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.546697 0.00388516486 0.0056160247410976614 0 271 0 -4.08297968 0.01657771 0.024117038815588843 0 272 1 4.140359 0.984332263 0.022782712303270784 1 -275 0 +275 0 ? ? ? 0 276 0 -5.50359 0.004055611 0.0058629065586406487 0 277 0 -6.235449 0.00195492059 0.002823114622841595 0 278 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.546697 0.00388516486 0.0056160247410976614 0 293 1 4.409566 0.9879857 0.017437977320811163 1 296 0 1.50613689 0.818488 2.4618631795162846 1 -297 0 +297 0 ? ? ? 0 299 1 7.93633366 0.9996426 0.00051569614495754321 1 300 1 7.85100842 0.9996108 0.00056163266102194308 1 301 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.280983 0.9637706 0.05323826093940856 1 317 1 9.331164 0.999911368 0.00012787476790525535 1 319 0 2.956067 0.9505495 4.3378705942158717 1 -321 0 +321 0 ? ? ? 0 323 1 5.71478939 0.996714 0.0047485078589307303 1 327 0 -6.235449 0.00195492059 0.002823114622841595 0 328 1 1.37423134 0.7980629 0.32542559954710693 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 18.5483551 1 0 1 613 0 -5.86728668 0.00282255374 0.0040778419569695535 0 614 0 -5.931934 0.00264632422 0.0038228993852335589 0 -617 0 +617 0 ? ? ? 0 618 0 -5.1376605 0.00583713735 0.0084458830416492391 0 619 0 -4.771732 0.00839464 0.012162025043583415 0 621 0 0.3424616 0.5847883 1.2680810757859888 1 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.595036 0.01000083 0.014500778622910757 0 19 0 -3.32248163 0.03480794 0.051112046016137037 0 22 0 -5.58065033 0.00375595246 0.0054288957255828087 0 -23 1 +23 1 ? ? ? 0 24 0 -6.503868 0.00149539544 0.0021590142749859306 0 26 0 -5.548387 0.00387863023 0.0056065605311466102 0 27 0 -4.308096 0.0132804094 0.01928794203033625 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.58065033 0.00375595246 0.0054288957255828087 0 135 0 -3.21473217 0.038615074 0.056813911931726302 0 136 0 -4.944373 0.007072995 0.010240432848577178 0 -139 0 +139 0 ? ? ? 0 140 0 -5.929988 0.00265146513 0.0038303358354253489 0 142 1 3.36657238 0.966643333 0.048944424422816823 1 143 0 -5.92445755 0.0026661302 0.0038515494632620751 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.654668 0.009427353 0.013665310686569912 0 155 1 1.85960007 0.865250349 0.20881047650865889 1 157 0 -5.867591 0.00282169762 0.0040766033438554457 0 -158 0 +158 0 ? ? ? 0 159 1 9.591543 0.9999317 9.8549424786689596E-05 1 160 1 7.363619 0.9993665 0.00091420540184684017 1 162 0 -5.231313 0.00531806657 0.0076928219174680146 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.973879 0.9974621 0.0036660786976757281 1 232 0 1.10679436 0.751531 2.008862256839882 1 234 0 -3.7641573 0.0226616841 0.033070042201646284 0 -235 0 +235 0 ? ? ? 0 236 1 8.480452 0.9997926 0.00029928085261473974 1 238 1 9.448584 0.9999212 0.00011368501131740873 1 243 0 -4.52710867 0.0106962454 0.015514542524251164 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 11.9692574 0.9999937 9.115109290810302E-06 1 287 0 -5.58065033 0.00375595246 0.0054288957255828087 0 289 1 5.61943245 0.9963864 0.005222752234881816 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 5.155699 0.9942666 0.008295308421383105 1 298 0 -2.71633816 0.0620161332 0.092364986128816551 0 302 1 12.1483784 0.9999947 7.6532482629398447E-06 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -6.503868 0.00149539544 0.0021590142749859306 0 310 0 -6.216928 0.001991392 0.00287583593610438 0 313 0 -7.2025423 0.000744136 0.0010739609416730365 0 -315 0 +315 0 ? ? ? 0 318 0 -6.503868 0.00149539544 0.0021590142749859306 0 320 1 4.269491 0.9862041 0.02004186218454097 1 322 0 -5.231313 0.00531806657 0.0076928219174680146 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.853205 0.00105495122 0.0015227762658880663 0 408 0 -4.852546 0.00774797052 0.011221486878031036 0 410 0 -6.853205 0.00105495122 0.0015227762658880663 0 -411 0 +411 0 ? ? ? 0 412 1 7.587702 0.9994936 0.00073076734716613276 1 417 0 -6.853205 0.00105495122 0.0015227762658880663 0 420 0 -3.639381 0.0255962238 0.037408370576337828 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt index 3c3d96d974..157c16ab10 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 6.59719467 0.9986377 0.0019667577188335239 1 21 1 6.9093914 0.999002635 0.0014396108762453246 1 22 0 -5.50264263 0.00405943953 0.0058684526611357517 0 -23 1 +23 1 ? ? ? 0 24 0 -6.14459133 0.002140461 0.0030913421022840675 0 25 1 0.9981127 0.7306873 0.4526739227114121 1 26 0 -5.56536674 0.00381357735 0.0055123467049374769 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.79816341 0.142074779 0.22107619020053021 0 38 1 4.925646 0.9927943 0.010433298358924143 1 39 1 0.895828247 0.7100915 0.49392321501990555 1 -40 0 +40 0 ? ? ? 0 41 1 2.72234154 0.93833214 0.091829412491125792 1 42 1 6.45601654 0.998431444 0.0022647243580549464 1 43 1 -0.5776577 0.359471738 1.4760497444221736 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.965955 0.006923029 0.010022553277705419 0 137 0 -5.93407059 0.00264069065 0.0038147503192492542 0 138 0 -4.668453 0.009299485 0.013479092815885742 0 -139 0 +139 0 ? ? ? 0 140 0 -5.93407059 0.00264069065 0.0038147503192492542 0 141 0 -6.47075844 0.00154565845 0.0022316389032971582 0 142 1 3.64823437 0.9746237 0.037082819238846716 1 143 0 -5.466771 0.00420707744 0.0060823331671915537 0 144 0 -6.03933048 0.002377488 0.0034340739321673348 0 -145 0 +145 0 ? ? ? 0 146 1 -0.110341072 0.4724427 1.0817887726099731 0 147 0 -6.11751556 0.002199078 0.0031760924132441036 0 148 0 -1.55709267 0.174064219 0.2758984827724662 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.36491013 0.9141121 0.12955701132443528 1 156 0 -6.267907 0.00189260417 0.002733037741461919 0 157 0 -5.607903 0.003655344 0.0052832085489667481 0 -158 0 +158 0 ? ? ? 0 159 1 10.9176846 0.9999819 2.614159977229895E-05 1 160 1 7.75989342 0.9995737 0.00061514108379339587 1 161 0 -4.309515 0.0132618267 0.019260772318565499 0 162 0 -5.07121468 0.006235666 0.0090243302579491296 0 163 0 -3.76092339 0.02273342 0.033175939048272665 0 -164 0 +164 0 ? ? ? 0 165 0 -3.924346 0.0193723515 0.028222656292276067 0 166 1 6.74228859 0.998821437 0.001701309224545778 1 167 1 7.94067574 0.99964416 0.0005134595729021004 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.7225275 0.673163354 1.6133583443719168 1 233 1 5.29774761 0.995022058 0.0071995859979441072 1 234 0 -3.554101 0.027811477 0.040691992083778071 0 -235 0 +235 0 ? ? ? 0 236 1 10.4206543 0.9999702 4.2996303413732479E-05 1 237 1 5.828807 0.997067034 0.0042375925496323883 1 238 1 11.3587952 0.9999883 1.6854398235588073E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.599171 0.9999322 9.7775450197580527E-05 1 247 1 2.067935 0.887747347 0.17177895049852415 1 248 0 -3.4094696 0.0320008248 0.046922276673100262 0 -249 0 +249 0 ? ? ? 0 250 0 -6.699335 0.0012302153 0.0017759181240158319 0 251 1 7.9959507 0.9996633 0.00048584679587356108 1 252 0 3.6111412 0.9736899 5.2482402117884597 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.72366619 0.848600447 0.23684265601696217 1 273 1 -0.342484474 0.4152061 1.2681004399882052 0 274 0 -4.846204 0.007796883 0.011292605602753639 0 -275 0 +275 0 ? ? ? 0 276 0 -5.50264263 0.00405943953 0.0058684526611357517 0 277 0 -6.57601929 0.0013914461 0.0020088303041534395 0 278 0 -6.14459133 0.002140461 0.0030913421022840675 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.101452 0.999176741 0.0011882013529343118 1 290 0 -7.007447 0.0009042981 0.0013052166206428517 0 291 0 -6.14459133 0.002140461 0.0030913421022840675 0 -292 1 +292 1 ? ? ? 0 293 1 4.6855 0.9908563 0.013252265463070151 1 -294 0 +294 0 ? ? ? 0 295 1 6.2168417 0.99800843 0.0028760930785605436 1 296 0 1.02090549 0.735148966 1.9167469555439196 1 -297 0 +297 0 ? ? ? 0 298 0 -3.02001572 0.0465297773 0.068740211727588499 0 299 1 7.09773827 0.9991737 0.0011925905306341051 1 300 1 6.64691544 0.998703659 0.001871438610749495 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.90730953 0.9482066 0.076726654530730437 1 313 0 -7.007447 0.0009042981 0.0013052166206428517 0 314 0 -6.649353 0.001293185 0.0018668790298154393 0 -315 0 +315 0 ? ? ? 0 316 1 2.12067223 0.892896235 0.16343556787762994 1 317 1 7.5715065 0.9994854 0.00074264021133653045 1 318 0 -5.92458725 0.00266578537 0.0038510506602204577 0 319 0 1.35382557 0.7947544 2.2845767018893883 1 320 1 5.988476 0.9974988 0.0036129742419820916 1 -321 0 +321 0 ? ? ? 0 322 0 -5.07121468 0.006235666 0.0090243302579491296 0 323 1 4.28225327 0.986376643 0.019789456912749203 1 324 0 -6.14459133 0.002140461 0.0030913421022840675 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.58592224 0.0100914678 0.014632868867702545 0 409 0 -5.205141 0.00545831956 0.0078962603748794508 0 410 0 -6.57601929 0.0013914461 0.0020088303041534395 0 -411 0 +411 0 ? ? ? 0 412 1 7.70175171 0.9995482 0.00065196153701951257 1 413 0 -3.70033741 0.02411908 0.035222977147123087 0 414 1 5.41945839 0.99559 0.006376396588275309 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.112665 0.00220974674 0.003191518193469859 0 615 0 -4.37095165 0.012481451 0.018120247723782505 0 616 0 -5.50264263 0.00405943953 0.0058684526611357517 0 -617 0 +617 0 ? ? ? 0 618 0 -4.965955 0.006923029 0.010022553277705419 0 619 0 -4.429267 0.0117827384 0.017099838701332234 0 620 0 -5.50264263 0.00405943953 0.0058684526611357517 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt index 4a5221f10c..ca68c5efee 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 4.28022528 0.9863494 0.019829298264044417 1 35 0 -4.29419041 0.0134638669 0.019556202484947195 0 37 0 -1.95759869 0.123727158 0.19054794774570458 0 -40 0 +40 0 ? ? ? 0 41 1 1.34579754 0.7934417 0.33380384997661361 1 44 1 5.22175264 0.9946311 0.0077665361772382407 1 45 0 -4.55969143 0.0103568994 0.015019761369081966 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -3.50701833 0.0291131958 0.042624993467054707 0 141 0 -4.510108 0.0108776484 0.015779105577069866 0 144 0 -4.29419041 0.0134638669 0.019556202484947195 0 -145 0 +145 0 ? ? ? 0 147 0 -4.118532 0.0160079524 0.023281438797448952 0 150 0 -4.43784046 0.0116833262 0.016954714516170224 0 151 1 2.86513376 0.9460957 0.079941963974142813 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -4.792855 0.008220622 0.011908867541972762 0 156 0 -4.152423 0.0154827805 0.022511655407603286 0 161 0 -3.361469 0.0335215963 0.049190599682179155 0 -164 0 +164 0 ? ? ? 0 167 1 3.00652266 0.9528679 0.069651835538374979 1 169 0 -4.76463461 0.008453925 0.012248282163630898 0 171 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.761687 0.9995744 0.00061410874818056485 1 247 1 2.24705172 0.904395938 0.14497358262585658 1 248 0 -2.75623083 0.05973572 0.088861781570551343 0 -249 0 +249 0 ? ? ? 0 250 0 -4.36834049 0.0125136767 0.018167327858314108 0 252 0 2.47272253 0.9222073 3.6842213333365441 1 254 1 5.30986643 0.9950817 0.0071130806432035459 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.36102 0.0126044592 0.01829996516606797 0 271 0 -3.23003125 0.0380511023 0.055967840193562365 0 272 1 1.97665262 0.878323853 0.18717511071531998 1 -275 0 +275 0 ? ? ? 0 276 0 -4.011443 0.0177852046 0.025889540399059593 0 277 0 -4.57693768 0.010181617 0.014764258553784819 0 278 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.36102 0.0126044592 0.01829996516606797 0 293 1 3.50341845 0.9707849 0.042776422691022505 1 296 0 0.854511261 0.701512635 1.7442582303419563 1 -297 0 +297 0 ? ? ? 0 299 1 4.02451658 0.9824417 0.025556263019683039 1 300 1 4.165831 0.9847203 0.02221411037855156 1 301 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.756329 0.940269768 0.088853362072482428 1 317 1 5.80636454 0.9970007 0.004333585623359653 1 319 0 1.0757966 0.7456977 1.975383454841898 1 -321 0 +321 0 ? ? ? 0 323 1 3.082415 0.956161559 0.064673689924879513 1 327 0 -4.57693768 0.010181617 0.014764258553784819 0 328 1 2.805962 0.942997158 0.084674672619554123 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 9.154173 0.999894261 0.00015255667702522208 1 613 0 -3.88851619 0.0200648643 0.029241838018723456 0 614 0 -4.371011 0.0124807227 0.018119183737570529 0 -617 0 +617 0 ? ? ? 0 618 0 -3.72869563 0.0234605316 0.034249740786795546 0 619 0 -3.44594836 0.0308899172 0.045267541796380997 0 621 0 -0.782016754 0.3138854 0.54347850919806506 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -3.6590457 0.0251103118 0.036689112229595131 0 19 0 -2.927483 0.0508115776 0.075233591312011905 0 22 0 -4.14026976 0.0156691261 0.022784748886103094 0 -23 1 +23 1 ? ? ? 0 24 0 -4.75638962 0.008523319 0.012349253326730132 0 26 0 -3.93837214 0.0191076845 0.027833331950969452 0 27 0 -3.40870714 0.03202445 0.046957488715061987 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.14026976 0.0156691261 0.022784748886103094 0 135 0 -2.62711 0.06741392 0.1006911970043132 0 136 0 -3.77448845 0.0224339925 0.032733975062274864 0 -139 0 +139 0 ? ? ? 0 140 0 -4.255713 0.0139846308 0.020317960583175827 0 142 1 1.9862442 0.879345238 0.18549840430247841 1 143 0 -3.74331784 0.02312786 0.033758349764067219 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.267953 0.0366871059 0.053923618256310278 0 155 1 1.88906479 0.8686489 0.20315494639328185 1 157 0 -4.39060831 0.0122414771 0.017769705367455636 0 -158 0 +158 0 ? ? ? 0 159 1 6.88715744 0.9989802 0.001471976257367614 1 160 1 5.57244873 0.996213257 0.0054734847881935346 1 162 0 -4.024827 0.0175529048 0.025548374564996635 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 4.531541 0.989350557 0.015446292097008558 1 232 0 0.251759052 0.562609434 1.1930059918223119 1 234 0 -2.51516485 0.0748018846 0.11216576762239737 0 -235 0 +235 0 ? ? ? 0 236 1 5.772835 0.9968987 0.0044811669482472481 1 238 1 6.65452766 0.9987135 0.0018572316949005939 1 243 0 -3.28154564 0.036209736 0.053208867556931244 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 8.09413 0.999694765 0.00044042879818134223 1 287 0 -4.14026976 0.0156691261 0.022784748886103094 0 289 1 4.30420876 0.9866685 0.019362604522295365 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 3.601099 0.973431468 0.038848681676236112 1 298 0 -2.22490883 0.09753586 0.14805849380018501 0 302 1 8.018049 0.9996706 0.00047526633912005399 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -4.75638962 0.008523319 0.012349253326730132 0 310 0 -4.506051 0.0109213842 0.015842898366274911 0 313 0 -4.987275 0.00677798 0.0098118479335557937 0 -315 0 +315 0 ? ? ? 0 318 0 -4.75638962 0.008523319 0.012349253326730132 0 320 1 2.81792259 0.943636656 0.08369663336324043 1 322 0 -4.024827 0.0175529048 0.025548374564996635 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -4.87183237 0.00760109862 0.011007956774122585 0 408 0 -3.31706572 0.03499035 0.051384728090851442 0 410 0 -4.87183237 0.00760109862 0.011007956774122585 0 -411 0 +411 0 ? ? ? 0 412 1 5.23884 0.9947216 0.007635302704268032 1 417 0 -4.87183237 0.00760109862 0.011007956774122585 0 420 0 -2.70138979 0.0628913939 0.093711836328273818 0 diff --git a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt index ff73fc5189..1f68afb149 100644 --- a/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 3.96267843 0.9813426 0.027171185979008489 1 21 1 5.265054 0.9948575 0.0074382155674881083 1 22 0 -4.51155567 0.0108620832 0.015756402974357524 0 -23 1 +23 1 ? ? ? 0 24 0 -5.107146 0.00601691334 0.0087067913827365544 0 25 1 0.6726794 0.6621029 0.59487269328676762 1 26 0 -4.36255264 0.0125854 0.018272117354929882 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -2.04001045 0.115065664 0.17635768683092809 0 38 1 3.6194644 0.973902345 0.038150977482211336 1 39 1 0.673018932 0.6621788 0.59470724065030856 1 -40 0 +40 0 ? ? ? 0 41 1 1.75375986 0.8524264 0.23035280375662126 1 42 1 5.60796976 0.9963449 0.0052828205055256471 1 43 1 -0.5184488 0.37321502 1.4219210475645763 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.10382557 0.0162412636 0.023623552162962799 0 137 0 -4.73142624 0.008736885 0.01266004645418743 0 138 0 -3.874074 0.0203508064 0.029662873497674963 0 -139 0 +139 0 ? ? ? 0 140 0 -4.73142624 0.008736885 0.01266004645418743 0 141 0 -5.13915634 0.00582846347 0.0084332958699167727 0 142 1 2.53462648 0.9265339 0.11008436686792368 1 143 0 -4.1287837 0.0158472732 0.02304587572398243 0 144 0 -4.919286 0.00725137955 0.010499643937654568 0 -145 0 +145 0 ? ? ? 0 146 1 0.185849667 0.546329141 0.87215771785369722 1 147 0 -4.93812275 0.007117027 0.010304411526052911 0 148 0 -2.08798218 0.110270388 0.16856112656874939 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.24833822 0.9045071 0.14479626651911345 1 156 0 -4.913893 0.007290303 0.010556209568305783 0 157 0 -4.69941568 0.009018519 0.013069997580773615 0 -158 0 +158 0 ? ? ? 0 159 1 8.213823 0.9997292 0.0003907114930137318 1 160 1 6.409885 0.9983575 0.0023716112041259128 1 161 0 -3.70981741 0.0238969475 0.034894625912887001 0 162 0 -4.29168558 0.0134971775 0.019604916230060426 0 163 0 -4.20999336 0.0146292737 0.021261482597556794 0 -164 0 +164 0 ? ? ? 0 165 0 -3.35529184 0.0337223038 0.049490233625727506 0 166 1 5.13335562 0.9941378 0.0084822193431077515 1 167 1 4.31503153 0.986810148 0.019155543364578753 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.387140751 0.5955942 1.3061245037640643 1 233 1 4.22282743 0.9855546 0.020992330945643928 1 234 0 -2.79013348 0.0578596778 0.085986144354393876 0 -235 0 +235 0 ? ? ? 0 236 1 7.198673 0.999253 0.0010781320275837761 1 237 1 4.24964952 0.9859315 0.020440656601130012 1 238 1 7.93097258 0.9996407 0.00051844885378563097 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.043116 0.9996788 0.00046348169408102081 1 247 1 2.170814 0.8975978 0.15585897141376154 1 248 0 -3.01326466 0.0468302034 0.069194857755858583 0 -249 0 +249 0 ? ? ? 0 250 0 -5.133764 0.005859794 0.0084787621145538308 0 251 1 5.4693675 0.9958038 0.0060666126630314994 1 252 0 2.521253 0.925618351 3.7489094481346643 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.26803923 0.7804069 0.35770157547115156 1 273 1 -0.341918468 0.415343523 1.2676230380461255 0 274 0 -4.11754751 0.01602347 0.023304190486054636 0 -275 0 +275 0 ? ? ? 0 276 0 -4.51155567 0.0108620832 0.015756402974357524 0 277 0 -5.327016 0.00483505568 0.0069924288920724343 0 278 0 -5.107146 0.00601691334 0.0087067913827365544 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.13687468 0.9941583 0.0084524642023233402 1 290 0 -5.54688644 0.00388443237 0.005614963867042919 0 291 0 -5.107146 0.00601691334 0.0087067913827365544 0 -292 1 +292 1 ? ? ? 0 293 1 3.96790457 0.981438041 0.027030903251222888 1 -294 0 +294 0 ? ? ? 0 295 1 4.55162954 0.9895601 0.015140724489837425 1 296 0 0.6700988 0.661525249 1.5628798789895393 1 -297 0 +297 0 ? ? ? 0 298 0 -2.40630245 0.08269336 0.12452401321321592 0 299 1 4.497334 0.988984048 0.015980843395464565 1 300 1 4.62145948 0.990257442 0.014124456553393392 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 1.84912252 0.864024043 0.21085663625525078 1 313 0 -5.54688644 0.00388443237 0.005614963867042919 0 314 0 -5.36138344 0.004672473 0.0067567510712633252 0 -315 0 +315 0 ? ? ? 0 316 1 2.20764112 0.900933564 0.15050737158003294 1 317 1 5.890983 0.997243345 0.0039825045312741787 1 318 0 -5.00404263 0.006666029 0.0096492437155483984 0 319 0 0.6469283 0.656317949 1.540853588056835 1 320 1 3.99139547 0.9818612 0.026409039376297374 1 -321 0 +321 0 ? ? ? 0 322 0 -4.29168558 0.0134971775 0.019604916230060426 0 323 1 3.157802 0.959215045 0.060073807756361522 1 324 0 -5.107146 0.00601691334 0.0087067913827365544 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -3.57900143 0.0271460768 0.039704898382857935 0 409 0 -4.281804 0.0136293843 0.019798272915656116 0 410 0 -5.327016 0.00483505568 0.0069924288920724343 0 -411 0 +411 0 ? ? ? 0 412 1 6.193191 0.997960865 0.0029448527311883681 1 413 0 -3.24647379 0.0374538042 0.055072312735252281 0 414 1 4.55400229 0.9895846 0.015105009633995839 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -4.95365334 0.00700811762 0.010146170993954725 0 615 0 -3.64432216 0.0254732724 0.037226341208279841 0 616 0 -4.51155567 0.0108620832 0.015756402974357524 0 -617 0 +617 0 ? ? ? 0 618 0 -4.10382557 0.0162412636 0.023623552162962799 0 619 0 -3.696096 0.0242191125 0.035370869193417386 0 620 0 -4.51155567 0.0108620832 0.015756402974357524 0 diff --git a/test/BaselineOutput/SingleDebug/NAReplace/featurized.tsv b/test/BaselineOutput/SingleDebug/NAReplace/featurized.tsv index f9ba7bf9f4..5ca9707e3e 100644 --- a/test/BaselineOutput/SingleDebug/NAReplace/featurized.tsv +++ b/test/BaselineOutput/SingleDebug/NAReplace/featurized.tsv @@ -1,14 +1,13 @@ #@ TextLoader{ #@ header+ #@ sep=tab -#@ col=A:TX:0 -#@ col=B:R4:1 -#@ col=C:R8:2 -#@ col=D:TX:3-6 -#@ col=E:R4:7-10 +#@ col=A:R4:0 +#@ col=B:R8:1 +#@ col=C:R4:2-5 +#@ col=D:R8:6-9 #@ } -A B C 8 0:"" -5 5 5 5 1 1 1 5 1 1 1 -5 5 5 5 4 4 5 5 4 4 5 -3 3 3 3 1 1 1 3 1 1 1 -6 6 6 6 8 8 1 6 8 8 1 +A B 8 0:"" +5 5 5 1 1 1 5 1 1 1 +5 5 5 4 4 5 5 4 4 5 +3 3 3 1 1 1 3 1 1 1 +6 6 6 8 8 1 6 8 8 1 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-CV-breast-cancer.txt index 7cfbe77b4a..cb14fcc5fe 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.8960438 0.7101358 0.49383312022081494 1 35 0 -1.47149158 0.186716 0.298168872554196 0 37 0 -1.077555 0.253969 0.42269254099397019 0 -40 0 +40 0 ? ? ? 0 41 1 -0.110265851 0.472461432 1.0817315302478872 0 44 1 1.38623238 0.799990058 0.32194602419012386 1 45 0 -1.49285364 0.1834938 0.29246424185161274 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.329715 0.2092065 0.33862709419515019 0 141 0 -1.51333463 0.18044512 0.28708753442168577 0 144 0 -1.47149158 0.186716 0.298168872554196 0 -145 0 +145 0 ? ? ? 0 147 0 -1.36272907 0.203797117 0.3287919993163349 0 150 0 -1.490515 0.183844447 0.29308395009459498 0 151 1 0.488354921 0.6197188 0.69031438298058734 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.54631257 0.175619483 0.27861768553552435 0 156 0 -1.34795034 0.206205666 0.33316283039965772 0 161 0 -1.29642129 0.214767918 0.34880897708810421 0 -164 0 +164 0 ? ? ? 0 167 1 0.7817354 0.686054 0.54360597630084673 1 169 0 -1.52040219 0.1794023 0.28525299602441972 0 171 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.30008841 0.908884346 0.13783136852928218 1 247 1 0.283135176 0.5703147 0.81016986229628651 1 248 0 -1.02733588 0.263600916 0.44144026210768955 0 -249 0 +249 0 ? ? ? 0 250 0 -1.3897934 0.199440747 0.32091990872905618 0 252 0 0.575369835 0.6400013 1.4739363860396681 1 254 1 1.36448383 0.796487451 0.32827646395944027 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.46262646 0.188065946 0.3005655398637459 0 271 0 -1.330715 0.209041134 0.33832542505991492 0 272 1 0.3417368 0.58461237 0.77444774011663531 1 -275 0 +275 0 ? ? ? 0 276 0 -1.43851376 0.1917756 0.30717219734355156 0 277 0 -1.50446939 0.181759879 0.28940381583950159 0 278 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.46262646 0.188065946 0.3005655398637459 0 293 1 0.690094 0.665987849 0.58643223892348229 1 296 0 0.122398376 0.530561447 1.090991766168943 1 -297 0 +297 0 ? ? ? 0 299 1 0.8090725 0.6919118 0.53133991521179469 1 300 1 1.07804394 0.7461236 0.42251343016986392 1 301 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.633879662 0.653368652 0.61403085786632716 1 317 1 1.75457263 0.852528632 0.2301798078479787 1 319 0 0.1361382 0.5339821 1.1015427183713855 1 -321 0 +321 0 ? ? ? 0 323 1 0.8411846 0.6987147 0.51722465664834216 1 327 0 -1.50446939 0.181759879 0.28940381583950159 0 328 1 0.758497 0.681027353 0.55421535102487163 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.80018759 0.942685962 0.085150850944065576 1 613 0 -1.31009483 0.212470978 0.34459500515903269 0 614 0 -1.49938011 0.18251799 0.29074111357003468 0 -617 0 +617 0 ? ? ? 0 618 0 -1.40553582 0.196939126 0.31641874253214702 0 619 0 -1.37255788 0.2022069 0.32591343959007929 0 621 0 -0.609831333 0.35209766 0.62615172722986678 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.29125416 0.2156406 0.35041324245427652 0 19 0 -1.18682528 0.233827218 0.38425832019246531 0 22 0 -1.38527489 0.200163171 0.32222238184143043 0 -23 1 +23 1 ? ? ? 0 24 0 -1.44789767 0.19032532 0.30458573193898042 0 26 0 -1.321662 0.210541919 0.341065431407162 0 27 0 -1.28084588 0.217406273 0.35366454853946033 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.36420059 0.203558445 0.3283595980104454 0 135 0 -0.9835708 0.272183836 0.45835400169661483 0 136 0 -1.33306038 0.2086536 0.33761874206668624 0 -139 0 +139 0 ? ? ? 0 140 0 -1.42708111 0.1935539 0.31034997353186378 0 142 1 0.5200758 0.6271655 0.67308190344279184 1 143 0 -1.07713079 0.254049361 0.42284792669686094 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.147677 0.240913659 0.39766410362601251 0 155 1 0.775262833 0.6846582 0.54654409786019487 1 157 0 -1.39568317 0.198502019 0.31922920878555094 0 -158 0 +158 0 ? ? ? 0 159 1 2.4702127 0.922027051 0.11711901622998303 1 160 1 1.8089292 0.8592324 0.21887965615084526 1 162 0 -1.34346867 0.206940219 0.33449847387485032 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.80702519 0.859001935 0.21926671447669044 1 232 0 -0.09305465 0.476753116 0.93443627885443414 0 234 0 -0.7166654 0.3281277 0.57374106872220532 0 -235 0 +235 0 ? ? ? 0 236 1 2.0308764 0.884000957 0.1778801634200064 1 238 1 2.5700233 0.9289072 0.10639359546534205 1 243 0 -0.9099059 0.2870191 0.48806467358561068 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 3.03633 0.954188645 0.067653576940859037 1 287 0 -1.36420059 0.203558445 0.3283595980104454 0 289 1 1.839293 0.8628651 0.21279308434601185 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.50403929 0.81817615 0.28951661133253503 1 298 0 -1.01809835 0.265398 0.44496526256029006 0 302 1 2.95740485 0.9506123 0.07307101604489151 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.44789767 0.19032532 0.30458573193898042 0 310 0 -1.4164151 0.1952242 0.31334116395889644 0 313 0 -1.53151011 0.17777285 0.28239108395025142 0 -315 0 +315 0 ? ? ? 0 318 0 -1.38467479 0.200259253 0.32239570012101715 0 320 1 1.03225207 0.737352252 0.43957409861257096 1 322 0 -1.34346867 0.206940219 0.33449847387485032 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.48970389 0.183966175 0.29329914054619283 0 408 0 -0.969954848 0.2748895 0.46372722780050113 0 410 0 -1.48970389 0.183966175 0.29329914054619283 0 -411 0 +411 0 ? ? ? 0 412 1 1.784699 0.8562761 0.22385204529486596 1 417 0 -1.48970389 0.183966175 0.29329914054619283 0 420 0 -0.827743053 0.3041225 0.52309474947909951 0 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-CV-breast-cancer.txt index 0302f143a8..4462030630 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 5.83416653 0.997082651 0.0042149967260249437 1 35 0 -5.20735168 0.005446332 0.0078788712300762211 0 37 0 -2.71164179 0.0622898862 0.092786101770516077 0 -40 0 +40 0 ? ? ? 0 41 1 1.76036549 0.853255451 0.22895036937850363 1 44 1 7.40955639 0.999394953 0.00087316212305162816 1 45 0 -5.421812 0.004399689 0.0063614137834035649 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.322978 0.0130867995 0.019004890409612719 0 141 0 -5.4411335 0.00431586 0.0062399450406013428 0 144 0 -5.20735168 0.005446332 0.0078788712300762211 0 -145 0 +145 0 ? ? ? 0 147 0 -4.875398 0.00757424766 0.010968922848962245 0 150 0 -5.39204836 0.00453200564 0.006553162786217457 0 151 1 4.061223 0.9830638 0.024643020624724253 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -5.73325 0.00322609954 0.0046618015694471927 0 156 0 -4.91463 0.007284973 0.010548463613035559 0 161 0 -4.12887526 0.0158458445 0.023043781432081138 0 -164 0 +164 0 ? ? ? 0 167 1 4.796111 0.9918059 0.011870271192039138 1 169 0 -5.68914 0.00337109854 0.0048716832359580091 0 171 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.8528 0.9999806 2.7947451458241805E-05 1 247 1 3.03624249 0.95418483 0.06765934462197544 1 248 0 -3.08702755 0.04364554 0.064382659498109923 0 -249 0 +249 0 ? ? ? 0 250 0 -5.14841127 0.00577508053 0.0083558311280958943 0 252 0 3.84936237 0.979150653 5.5838539825714451 1 254 1 7.35057163 0.9993582 0.00092625186884288718 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.265687 0.005139294 0.0074335520922891698 0 271 0 -4.097221 0.016347127 0.023778810630558714 0 272 1 2.79984665 0.942667544 0.085179038039461821 1 -275 0 +275 0 ? ? ? 0 276 0 -4.915235 0.007280598 0.010542105680653375 0 277 0 -5.49946833 0.00407229364 0.0058870729354622087 0 278 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.265687 0.005139294 0.0074335520922891698 0 293 1 4.736758 0.991309166 0.012593024732102163 1 296 0 1.75365162 0.85241276 2.760360102190484 1 -297 0 +297 0 ? ? ? 0 299 1 5.53140068 0.9960552 0.0057024187918531082 1 300 1 6.04150867 0.9976277 0.0034266068641119179 1 301 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 4.025776 0.9824635 0.025524315591638584 1 317 1 8.423935 0.9997805 0.00031674081892699212 1 319 0 1.82892418 0.861633539 2.8534338101579171 1 -321 0 +321 0 ? ? ? 0 323 1 4.504982 0.9890671 0.015859728309184334 1 327 0 -5.49946833 0.00407229364 0.0058870729354622087 0 328 1 4.454892 0.9885119 0.016669732083571809 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 13.2412815 0.9999982 2.5797420694119618E-06 1 613 0 -4.637956 0.009584702 0.013894495676017315 0 614 0 -5.333713 0.00480293762 0.0069458679549985972 0 -617 0 +617 0 ? ? ? 0 618 0 -4.623119 0.009726578 0.014101176192227349 0 619 0 -4.331002 0.0129835671 0.018853990456118342 0 621 0 -0.969069 0.2750661 0.46407865469425469 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.77261448 0.00838729553 0.012151339639068351 0 19 0 -4.05630064 0.01701831 0.02476355221691592 0 22 0 -5.26293468 0.00515338546 0.0074539868149001397 0 -23 1 +23 1 ? ? ? 0 24 0 -5.847086 0.00287998538 0.0041609352152631573 0 26 0 -5.01217842 0.00661237258 0.0095713166824383505 0 27 0 -4.54662037 0.0104917344 0.015216336343473301 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.25242233 0.005207562 0.0075325538981779582 0 135 0 -3.511651 0.028982535 0.042430850287558594 0 136 0 -4.90477753 0.0073565715 0.010652520453755514 0 -139 0 +139 0 ? ? ? 0 140 0 -5.39509726 0.00451827142 0.0065332584338255633 0 142 1 2.2592926 0.9054491 0.14329456494743992 1 143 0 -4.372905 0.0124574006 0.018085112102952432 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.16768646 0.01525183 0.022173264390610525 0 155 1 2.62865639 0.9326832 0.10054091831709649 1 157 0 -5.488929 0.004115263 0.0059493194428234175 0 -158 0 +158 0 ? ? ? 0 159 1 8.939568 0.9998689 0.00018910731810054123 1 160 1 6.86695 0.9989594 0.0015020181784367139 1 162 0 -5.13077164 0.00587725034 0.0085040948177142958 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.89666176 0.9972589 0.0039599989302112676 1 232 0 -0.179828167 0.455163717 0.87610531376236689 0 234 0 -3.15223122 0.04100345 0.060402470875188088 0 -235 0 +235 0 ? ? ? 0 236 1 7.74873161 0.9995689 0.00062210936850165717 1 238 1 9.156269 0.99989444 0.00015229867579129541 1 243 0 -3.844254 0.0209539 0.030551300678084092 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.8717079 0.999981 2.7431493603031304E-05 1 287 0 -5.25242233 0.005207562 0.0075325538981779582 0 289 1 6.0516243 0.9976515 0.0033921289521523556 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.83148575 0.992088437 0.011459363337312383 1 298 0 -3.31150723 0.0351785272 0.051666079123835182 0 302 1 10.6722527 0.9999768 3.3451013395372324E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.847086 0.00287998538 0.0041609352152631573 0 310 0 -5.61057949 0.00364560937 0.005269112963217676 0 313 0 -6.111411 0.00221251347 0.0031955185815049345 0 -315 0 +315 0 ? ? ? 0 318 0 -5.81555 0.002971982 0.0042940476081927553 0 320 1 3.90395832 0.98023653 0.028798182473169599 1 322 0 -5.13077164 0.00587725034 0.0085040948177142958 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.97924852 0.002524339 0.0036464555896367997 0 408 0 -4.06157875 0.01693024 0.024634297974768089 0 410 0 -5.97924852 0.002524339 0.0036464555896367997 0 -411 0 +411 0 ? ? ? 0 412 1 6.142968 0.9978561 0.003096342326011223 1 417 0 -5.97924852 0.002524339 0.0036464555896367997 0 420 0 -3.33492 0.0343924649 0.050491160716171948 0 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt index 22409b2a5a..a0b287a08e 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.078068 0.983342052 0.024234755458090462 1 21 1 5.07943726 0.993815064 0.0089506841986991444 1 22 0 -6.401946 0.00165558141 0.0023904784483303873 0 -23 1 +23 1 ? ? ? 0 24 0 -6.95376 0.000954126066 0.0013771700453364219 0 25 1 0.131910324 0.532929838 0.90798248584253316 1 26 0 -6.20803738 0.00200913986 0.0029014918436988269 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -3.97515774 0.018430287 0.026837360561959296 0 38 1 3.85910988 0.9793487 0.030105439588229164 1 39 1 0.289131165 0.5717834 0.806459299491597 1 -40 0 +40 0 ? ? ? 0 41 1 0.5163593 0.626296043 0.67508333009285992 1 42 1 6.25112057 0.9980754 0.0027792492014372186 1 43 1 -1.67306614 0.158015817 2.6618591152960773 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -6.01794958 0.00242874329 0.0035081978824050039 0 137 0 -6.618124 0.00133415242 0.0019260601909807822 0 138 0 -5.72209167 0.0032621813 0.0047140259759722502 0 -139 0 +139 0 ? ? ? 0 140 0 -6.618124 0.00133415242 0.0019260601909807822 0 141 0 -7.00212 0.00090912357 0.0013121846250039875 0 142 1 1.86599445 0.8659941 0.20757090594664257 1 143 0 -5.405919 0.00446985662 0.0064630953185838089 0 144 0 -6.785942 0.00112826866 0.001628666561573529 0 -145 0 +145 0 ? ? ? 0 146 1 -0.2344532 0.441653728 1.17901240396708 0 147 0 -6.632551 0.00131506776 0.0018984903241446914 0 148 0 -3.505578 0.0291539337 0.042685529480640867 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.2497673 0.9046305 0.14459948541676573 1 156 0 -6.63783455 0.0013081471 0.0018884928122825894 0 157 0 -6.569764 0.00140016491 0.0020214264686188826 0 -158 0 +158 0 ? ? ? 0 159 1 9.262019 0.99990505 0.00013699068516156971 1 160 1 6.81782627 0.9989071 0.001577599187795759 1 161 0 -5.57644749 0.00377171184 0.005451717595353108 0 162 0 -6.18576765 0.002054292 0.0029667652410511319 0 163 0 -5.441502 0.00431427639 0.0062376503223464629 0 -164 0 +164 0 ? ? ? 0 165 0 -5.081398 0.00617287867 0.0089331816190877534 0 166 1 5.99673557 0.9975193 0.0035833193575627669 1 167 1 4.04529858 0.982796669 0.025035126906624509 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.9520569 0.278471351 0.47087141475726779 0 233 1 4.02608871 0.982468843 0.025516438252352936 1 234 0 -4.248459 0.0140850125 0.020464842181276369 0 -235 0 +235 0 ? ? ? 0 236 1 8.120999 0.9997029 0.00042873045458892818 1 237 1 3.97880173 0.9816355 0.02674065507190734 1 238 1 9.321572 0.999910533 0.00012907875367543245 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.005473 0.9998773 0.00017706700464490148 1 247 1 1.74156952 0.8508863 0.23296175567699981 1 248 0 -4.675151 0.009237982 0.013389533098992946 0 -249 0 +249 0 ? ? ? 0 250 0 -6.85401249 0.00105410081 0.0015215480827833656 0 251 1 5.57425 0.996220052 0.0054636445480688526 1 252 0 2.525447 0.9259066 3.7545111242363967 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.04092026 0.73902756 0.43629992864352879 1 273 1 -1.54782867 0.1754001 2.5112785805368674 0 274 0 -5.9604435 0.002572135 0.0037155870547972859 0 -275 0 +275 0 ? ? ? 0 276 0 -6.401946 0.00165558141 0.0023904784483303873 0 277 0 -7.16993856 0.0007687785 0.0011095394740419171 0 278 0 -6.95376 0.000954126066 0.0013771700453364219 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.963811 0.997436464 0.0037031495249571483 1 290 0 -7.386117 0.0006194141 0.0008939025092919693 0 291 0 -6.95376 0.000954126066 0.0013771700453364219 0 -292 1 +292 1 ? ? ? 0 293 1 3.82977867 0.97874707 0.030992011604895862 1 -294 0 +294 0 ? ? ? 0 295 1 4.92571831 0.992794752 0.010432605435470237 1 296 0 -0.00817585 0.497956038 0.9941143927175089 0 -297 0 +297 0 ? ? ? 0 298 0 -4.33488274 0.0129339322 0.018781442391253584 0 299 1 4.3436203 0.987177134 0.018619117824464626 1 300 1 4.40119934 0.987885952 0.017583597598896144 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.9174366 0.7145195 0.48495470886222203 1 313 0 -7.386117 0.0006194141 0.0008939025092919693 0 314 0 -7.21227646 0.0007369329 0.001063561390310713 0 -315 0 +315 0 ? ? ? 0 316 1 2.16819382 0.897356749 0.15624644520733796 1 317 1 6.37115574 0.998292744 0.0024651544582865699 1 318 0 -6.826747 0.00108320534 0.0015635819620021878 0 319 0 -0.115743637 0.471096337 0.91892312744499838 0 320 1 4.11173058 0.9838846 0.023439022974424555 1 -321 0 +321 0 ? ? ? 0 322 0 -6.18576765 0.002054292 0.0029667652410511319 0 323 1 3.31629562 0.964983642 0.051423608112212277 1 324 0 -6.95376 0.000954126066 0.0013771700453364219 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -5.098096 0.00607128069 0.0087857038468610767 0 409 0 -6.106088 0.00222429563 0.0032125544404639546 0 410 0 -7.16993856 0.0007687785 0.0011095394740419171 0 -411 0 +411 0 ? ? ? 0 412 1 6.053849 0.9976567 0.0033846301153950954 1 413 0 -5.12191772 0.00592920836 0.0085794995279385791 0 414 1 4.639694 0.9904318 0.013870479681066349 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.82828045 0.00108154735 0.0015611874005433336 0 615 0 -5.42623472 0.00438035838 0.0063334027525860032 0 616 0 -6.401946 0.00165558141 0.0023904784483303873 0 -617 0 +617 0 ? ? ? 0 618 0 -6.01794958 0.00242874329 0.0035081978824050039 0 619 0 -5.633953 0.00356168649 0.0051475999586587254 0 620 0 -6.401946 0.00165558141 0.0023904784483303873 0 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt index 5197b73592..ffa3b3b912 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 2.322781 0.998222351 0.0025668874786068312 1 35 0 -1.91525483 0.005915621 0.0085597800780180429 0 37 0 -0.682821751 0.142692313 0.22211501491184898 0 -40 0 +40 0 ? ? ? 0 41 1 0.809957266 0.9039319 0.14571398208584879 1 44 1 2.73944569 0.999422848 0.00083289438051421241 1 45 0 -1.94671428 0.0054360223 0.0078639160441893435 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.53363073 0.0164193157 0.023884691545080352 0 141 0 -2.06519866 0.00395226665 0.0057132130161921821 0 144 0 -1.91525483 0.005915621 0.0085597800780180429 0 -145 0 +145 0 ? ? ? 0 147 0 -1.84313476 0.007179688 0.010395463465996112 0 150 0 -1.97708547 0.005009752 0.0072457094989476683 0 151 1 1.45099306 0.9815549 0.026859095208834581 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -2.1974268 0.00276787742 0.0039987395951823247 0 156 0 -1.89696681 0.006213517 0.0089921755974289187 0 161 0 -1.42502642 0.0218984671 0.031943861389482818 0 -164 0 +164 0 ? ? ? 0 167 1 2.354064 0.998366237 0.0023589497379793915 1 169 0 -2.21602869 0.00263251015 0.0038029171463890205 0 171 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.30174255 0.9998737 0.00018222712667875736 1 247 1 0.629675865 0.8525092 0.23021269060466767 1 248 0 -1.12171221 0.0483664 0.07152188601007381 0 -249 0 +249 0 ? ? ? 0 250 0 -2.04691076 0.00415170472 0.0060021119500057712 0 252 0 1.01602411 0.942604 4.12290612676055 1 254 1 2.17657 0.997363031 0.0038093670302838936 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.897539 0.006203974 0.008978322239894131 0 271 0 -1.368627 0.0254129283 0.037137010195002478 0 272 1 0.765766859 0.8930476 0.16319106749073262 1 -275 0 +275 0 ? ? ? 0 276 0 -1.78302681 0.008435549 0.012221545522817861 0 277 0 -2.047483 0.00414531538 0.0059928556832883097 0 278 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.897539 0.006203974 0.008978322239894131 0 293 1 1.17371225 0.961760044 0.056251103565820429 1 296 0 0.4346769 0.773356 2.1415002561411738 1 -297 0 +297 0 ? ? ? 0 299 1 2.019999 0.9959794 0.0058121507954068162 1 300 1 2.2421217 0.9977903 0.0031914838554231535 1 301 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.9578371 0.9334789 0.099310694790810672 1 317 1 2.6748023 0.999312758 0.00099182083151890563 1 319 0 0.702352047 0.87553966 3.0062420076587428 1 -321 0 +321 0 ? ? ? 0 323 1 1.138917 0.9581469 0.061681187824867326 1 327 0 -2.047483 0.00414531538 0.0059928556832883097 0 328 1 0.998517036 0.9399896 0.089283259387362562 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 5.011583 0.999998748 1.8058189642293044E-06 1 613 0 -1.848559 0.00707593327 0.010244702155254935 0 614 0 -1.9948014 0.0047766366 0.006907740990800825 0 -617 0 +617 0 ? ? ? 0 618 0 -1.6507988 0.0120159388 0.017440327322179375 0 619 0 -1.51857078 0.017089799 0.024868477463820319 0 621 0 -0.159297943 0.406589121 0.75289671799437707 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.01459932 0.02046752 0.029834763390847389 0 19 0 -1.722561 0.0706015 0.10563077935630806 0 22 0 -2.18885 0.009580599 0.013888519789490289 0 -23 1 +23 1 ? ? ? 0 24 0 -2.45265675 0.00300532184 0.0043422911736604322 0 26 0 -2.15108967 0.0113010341 0.016396771634848997 0 27 0 -1.8968116 0.0339725837 0.049863960916695586 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.18885 0.009580599 0.013888519789490289 0 135 0 -1.67432666 0.0859358 0.12963259925365803 0 136 0 -2.04283071 0.0181100331 0.026366733420336564 0 -139 0 +139 0 ? ? ? 0 140 0 -2.21708131 0.00846625 0.012266215273071623 0 142 1 -0.39607358 0.963921666 0.053012185419208942 0 143 0 -2.12454414 0.0126899984 0.018424952871546458 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.87877429 0.03668837 0.053925509586129468 0 155 1 -0.6914537 0.878659666 0.18662362559522522 0 157 0 -2.30663776 0.005714676 0.0082681821048535484 0 -158 0 +158 0 ? ? ? 0 159 1 0.826603651 0.9998317 0.00024285994311160547 1 160 1 0.465637445 0.999170542 0.0011971518470987805 1 162 0 -2.16061854 0.0108400183 0.015724220850716938 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 -0.0580048561 0.9916699 0.012068138193505744 0 232 0 -1.12658465 0.5141482 1.0414117089703063 0 234 0 -1.83982372 0.04328234 0.063834867527523209 0 -235 0 +235 0 ? ? ? 0 236 1 0.979896069 0.999914467 0.00012340282955575714 1 238 1 0.930060863 0.9998934 0.00015376068339360816 1 243 0 -2.01161838 0.02073334 0.030226326896041298 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1.41079044 0.999987245 1.8402261006614678E-05 1 287 0 -2.18885 0.009580599 0.013888519789490289 0 289 1 0.0753440857 0.9953623 0.0067063762389963991 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 -0.367047548 0.9681271 0.046731673516829733 0 298 0 -1.44922507 0.202717274 0.32683668378040626 0 302 1 1.39901066 0.9999866 1.9348177961999343E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.45265675 0.00300532184 0.0043422911736604322 0 310 0 -2.33486915 0.00504769245 0.0073007223089671565 0 313 0 -2.50911975 0.002343144 0.0033844089083832525 0 -315 0 +315 0 ? ? ? 0 318 0 -2.45265675 0.00300532184 0.0043422911736604322 0 320 1 0.09479761 0.995742738 0.006155041546569217 1 322 0 -2.16061854 0.0108400183 0.015724220850716938 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.48088837 0.00265371148 0.0038335852528692412 0 408 0 -2.10281515 0.0139513118 0.020269210492385212 0 410 0 -2.48088837 0.00265371148 0.0038335852528692412 0 -411 0 +411 0 ? ? ? 0 412 1 -0.0120434761 0.9931908 0.0098571608082237746 0 417 0 -2.48088837 0.00265371148 0.0038335852528692412 0 420 0 -1.86625433 0.0386949927 0.056933846240527428 0 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt index 1c905b24c8..b2ca41a554 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.51313376 0.997060657 0.0042468207167790329 1 21 1 1.88215756 0.9991858 0.0011751199613487565 1 22 0 -1.5913856 0.006754517 0.0097777676588594702 0 -23 1 +23 1 ? ? ? 0 24 0 -1.85515833 0.00270523666 0.0039081201011675363 0 25 1 0.183981657 0.767693 0.38139863321618261 1 26 0 -1.596403 0.006638234 0.0096088756803218642 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.48321867 0.244259775 0.40403768061841711 0 38 1 1.01125383 0.983338952 0.024239302762828055 1 39 1 -0.0106446743 0.62648654 0.67464458153125895 0 -40 0 +40 0 ? ? ? 0 41 1 0.8607874 0.9721748 0.040712322202366577 1 42 1 1.4849956 0.9967589 0.004683544378464086 1 43 1 -0.7279444 0.121085733 3.0458992041721875 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.404305 0.0128827207 0.018706593545987932 0 137 0 -1.701774 0.00460771844 0.0066628947272309403 0 138 0 -1.360993 0.0149499336 0.021731041715098105 0 -139 0 +139 0 ? ? ? 0 140 0 -1.701774 0.00460771844 0.0066628947272309403 0 141 0 -1.8888545 0.00240627048 0.0034756978965475691 0 142 1 0.881847143 0.974092543 0.037869253621396266 1 143 0 -1.545697 0.007910921 0.01145843020471164 0 144 0 -1.77846622 0.00353102176 0.0051032026604077695 0 -145 0 +145 0 ? ? ? 0 146 1 -0.216273546 0.450334132 1.1509322683430081 0 147 0 -1.82796979 0.00297325244 0.0042958860993499966 0 148 0 -0.7305672 0.12011648 0.18461554363807445 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.429970026 0.886195242 0.17430351324204957 1 156 0 -1.92223477 0.0021426254 0.0030944713637373776 0 157 0 -1.66807771 0.005178785 0.0074908208279084272 0 -158 0 +158 0 ? ? ? 0 159 1 2.79265332 0.999965847 4.9273870913609948E-05 1 160 1 2.08471417 0.999597847 0.00058030016510027748 1 161 0 -1.23200977 0.0232356917 0.033917610773834123 0 162 0 -1.48099709 0.009891603 0.014341615100451873 0 163 0 -1.20272636 0.02566766 0.037514142314967996 0 -164 0 +164 0 ? ? ? 0 165 0 -1.09945023 0.0363804549 0.053464438820388052 0 166 1 1.35837817 0.99497056 0.0072742561293771086 1 167 1 1.40636182 0.9957416 0.0061566823680882154 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.0236132145 0.6158536 1.3802718900263455 0 233 1 1.45269513 0.9963742 0.005240444497289571 1 234 0 -1.12313449 0.0335955 0.049300920379201259 0 -235 0 +235 0 ? ? ? 0 236 1 2.801736 0.9999669 4.7725975020197848E-05 1 237 1 1.18725443 0.9909072 0.013178153095375312 1 238 1 2.58363461 0.999929249 0.00010207531428030829 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.15890861 0.9996894 0.00044817040124598361 1 247 1 0.7686317 0.9620387 0.055833170607592193 1 248 0 -0.9843831 0.0533667132 0.079122441964183776 0 -249 0 +249 0 ? ? ? 0 250 0 -2.03262329 0.001459477 0.0021071182550953969 0 251 1 1.98737311 0.999435544 0.0008145677672951316 1 252 0 0.938287735 0.9786189 5.5475214513768956 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.364958525 0.861275733 0.21545291272160746 1 273 1 -0.0762293339 0.5716655 0.80675680440858633 0 274 0 -1.41909039 0.0122437514 0.017773027141160865 0 -275 0 +275 0 ? ? ? 0 276 0 -1.5913856 0.006754517 0.0097777676588594702 0 277 0 -1.96554685 0.00184303813 0.0026613952529717312 0 278 0 -1.85515833 0.00270523666 0.0039081201011675363 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.51208711 0.99705 0.0042622586236973218 1 290 0 -2.07593513 0.00125529012 0.0018121384446850468 0 291 0 -1.85515833 0.00270523666 0.0039081201011675363 0 -292 1 +292 1 ? ? ? 0 293 1 1.33046842 0.9944597 0.0080152036111407286 1 -294 0 +294 0 ? ? ? 0 295 1 1.2251215 0.9920222 0.011555664783429847 1 296 0 0.111403227 0.719591737 1.8343992308763424 1 -297 0 +297 0 ? ? ? 0 298 0 -0.7513188 0.112681925 0.17247673834356275 0 299 1 1.96303582 0.9993856 0.00088667099785278705 1 300 1 1.383044 0.995382845 0.0066765713113142704 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.17001605 0.7589018 0.39801492605707506 1 313 0 -2.07593513 0.00125529012 0.0018121384446850468 0 314 0 -1.97056413 0.00181115558 0.0026153142649758387 0 -315 0 +315 0 ? ? ? 0 316 1 0.541802645 0.9199835 0.12032009864475471 1 317 1 1.56700492 0.9975625 0.0035208219605770688 1 318 0 -1.84010613 0.00285049225 0.0041182633675105971 0 319 0 0.05099702 0.675234735 1.6225307564202673 1 320 1 1.7057507 0.9984955 0.0021721414278146082 1 -321 0 +321 0 ? ? ? 0 322 0 -1.48099709 0.009891603 0.014341615100451873 0 323 1 0.7110567 0.95399344 0.067948749601842412 1 324 0 -1.85515833 0.00270523666 0.0039081201011675363 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.45910537 0.0106672905 0.015472318523715401 0 409 0 -1.54807365 0.007846191 0.011364302031181555 0 410 0 -1.96554685 0.00184303813 0.0026613952529717312 0 -411 0 +411 0 ? ? ? 0 412 1 1.61123109 0.9979099 0.0030185274243607983 1 413 0 -1.063524 0.0410328656 0.060446722723610943 0 414 1 1.54281878 0.9973487 0.0038300596622238277 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.78348351 0.00347004039 0.0050149160869795298 0 615 0 -1.31768119 0.0173430257 0.025240206070453266 0 616 0 -1.5913856 0.006754517 0.0097777676588594702 0 -617 0 +617 0 ? ? ? 0 618 0 -1.404305 0.0128827207 0.018706593545987932 0 619 0 -1.21722436 0.024434112 0.035688781701501276 0 620 0 -1.5913856 0.006754517 0.0097777676588594702 0 diff --git a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-TrainTest-breast-cancer.txt index ff963fe7a2..9b8d818944 100644 --- a/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SDCA/BinarySDCA-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.37743354 0.7985785 0.32449386248842338 1 21 1 1.7630446 0.853590548 0.22838389385799779 1 22 0 -1.874893 0.132976577 0.20585712538187464 0 -23 1 +23 1 ? ? ? 0 24 0 -1.948468 0.124720506 0.19218432401279964 0 25 1 0.243255854 0.5605158 0.83517300013076445 1 26 0 -1.79869926 0.142009482 0.22096639052344227 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.30918431 0.212623373 0.34487420705553562 0 38 1 1.52097869 0.8206825 0.28510385981645775 1 39 1 0.568297148 0.638370156 0.64753488753370236 1 -40 0 +40 0 ? ? ? 0 41 1 0.08091617 0.520218 0.94281173541226504 1 42 1 2.4137857 0.9178725 0.12363434755323521 1 43 1 -0.08661461 0.478359878 1.0638317048217665 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.80893993 0.1407663 0.21887750444885776 0 137 0 -1.9332242 0.126394138 0.1949455575113978 0 138 0 -1.70428443 0.153906524 0.24111103479242962 0 -139 0 +139 0 ? ? ? 0 140 0 -1.9332242 0.126394138 0.1949455575113978 0 141 0 -1.99917722 0.119289339 0.18325996439997919 0 142 1 0.6176696 0.649688363 0.62218023037407078 1 143 0 -1.44733071 0.190412715 0.3047414631100317 0 144 0 -1.94084609 0.125554934 0.19356034047915699 0 -145 0 +145 0 ? ? ? 0 146 1 0.272925377 0.567810953 0.81651741701068026 1 147 0 -1.8198235 0.13945505 0.21667754281184065 0 148 0 -0.713562 0.328812242 0.57521169167023078 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.9670353 0.7245282 0.46488626460347809 1 156 0 -1.793424 0.142653435 0.22204959316054371 0 157 0 -1.882515 0.132100269 0.20439971838260004 0 -158 0 +158 0 ? ? ? 0 159 1 3.20693851 0.961094558 0.057249715926430703 1 160 1 2.3220613 0.910687745 0.13497162539860094 1 161 0 -1.65904641 0.159890041 0.25134992500211428 0 162 0 -1.81656182 0.139846936 0.21733468482291701 0 163 0 -1.60762358 0.166918814 0.26347099825296155 0 -164 0 +164 0 ? ? ? 0 165 0 -1.50459647 0.181740969 0.28937047546414252 0 166 1 2.457339 0.9210965 0.11857577878171396 1 167 1 1.31327176 0.7880601 0.34362238392184646 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.213132143 0.446917742 0.85443403273590568 0 233 1 1.23634028 0.774926364 0.36786886673349695 1 234 0 -1.00412917 0.268130362 0.4503413991221501 0 -235 0 +235 0 ? ? ? 0 236 1 2.65734458 0.934462249 0.097791712220334726 1 237 1 1.59322524 0.8310694 0.26695912082718543 1 238 1 3.30821967 0.9647097 0.051833223594515407 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.32132936 0.965153337 0.051169929428424521 1 247 1 0.572052956 0.639236748 0.64557774708968529 1 248 0 -1.29279661 0.215379834 0.34993367973529765 0 -249 0 +249 0 ? ? ? 0 250 0 -1.85175514 0.135666952 0.21034077001927648 0 251 1 1.686516 0.843765438 0.24508610192596253 1 252 0 0.9327009 0.717622936 1.8243051828654713 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.5505221 0.6342567 0.65686119381445462 1 273 1 -0.451063156 0.389108032 1.3617573340834921 0 274 0 -1.72499943 0.151228324 0.23655158068052121 0 -275 0 +275 0 ? ? ? 0 276 0 -1.874893 0.132976577 0.20585712538187464 0 277 0 -2.00679922 0.1184909 0.18195262731195211 0 278 0 -1.948468 0.124720506 0.19218432401279964 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 2.363632 0.914011657 0.12971552945936157 1 290 0 -2.06513023 0.112532459 0.17223374110934919 0 291 0 -1.948468 0.124720506 0.19218432401279964 0 -292 1 +292 1 ? ? ? 0 293 1 1.17569327 0.764172554 0.38802965202562639 1 -294 0 +294 0 ? ? ? 0 295 1 1.9321816 0.8734907 0.1951357665052601 1 296 0 0.2875533 0.571397066 1.222286369266439 1 -297 0 +297 0 ? ? ? 0 298 0 -1.39716244 0.198266774 0.31880583096618414 0 299 1 1.31489539 0.788331151 0.34312631094095036 1 300 1 1.6675427 0.8412479 0.24939706872810494 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.6733315 0.662248731 0.59455492152164024 1 313 0 -2.06513023 0.112532459 0.17223374110934919 0 314 0 -2.03932476 0.115135506 0.17647155325903272 0 -315 0 +315 0 ? ? ? 0 316 1 0.9730499 0.725726962 0.46250122508222125 1 317 1 2.53153682 0.9263233 0.11041230078378952 1 318 0 -1.85089087 0.135768339 0.21051001013383977 0 319 0 0.279238 0.5693594 1.2154438271457824 1 320 1 1.35926127 0.795639634 0.32981295026166818 1 -321 0 +321 0 ? ? ? 0 322 0 -1.81656182 0.139846936 0.21733468482291701 0 323 1 1.28926969 0.7840235 0.35103115421599534 1 324 0 -1.948468 0.124720506 0.19218432401279964 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.33084726 0.209019259 0.33828552617088203 0 409 0 -1.77023745 0.1455128 0.22686922143757757 0 410 0 -2.00679922 0.1184909 0.18195262731195211 0 -411 0 +411 0 ? ? ? 0 412 1 2.25751042 0.9052964 0.14353790094246915 1 413 0 -1.58000016 0.170795456 0.27020007178125394 0 414 1 1.60186315 0.8322786 0.2648615365498318 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.97337174 0.122027189 0.18775183095640005 0 615 0 -1.59962881 0.1680335 0.26540264924934215 0 616 0 -1.874893 0.132976577 0.20585712538187464 0 -617 0 +617 0 ? ? ? 0 618 0 -1.80893993 0.1407663 0.21887750444885776 0 619 0 -1.74298692 0.148933932 0.23265696296547442 0 620 0 -1.874893 0.132976577 0.20585712538187464 0 diff --git a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-CV-breast-cancer.txt index 8b3d215017..bee6e20c10 100644 --- a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.344691515 0.5853297 0.77267858291118818 1 35 0 -0.6512495 0.342708021 0.60539371596619707 0 37 0 -0.503962 0.376610041 0.68179317658398975 0 -40 0 +40 0 ? ? ? 0 41 1 -0.09442055 0.4764124 1.069717174215209 0 44 1 0.5798913 0.6410424 0.64150828626867074 1 45 0 -0.6560674 0.341623574 0.60301541680763648 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -0.597589 0.354895473 0.63239515287919279 0 141 0 -0.6678773 0.3389723 0.5972173671855312 0 144 0 -0.6512495 0.342708021 0.60539371596619707 0 -145 0 +145 0 ? ? ? 0 147 0 -0.6018609 0.353918076 0.63021098184538349 0 150 0 -0.656989932 0.3414161 0.60256083089734203 0 151 1 0.160812974 0.540116847 0.88865654739065503 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -0.67789495 0.336731285 0.59233461600767046 0 156 0 -0.5912131 0.356356561 0.63566639957530646 0 161 0 -0.5840918 0.3579916 0.63933593475596628 0 -164 0 +164 0 ? ? ? 0 167 1 0.315922141 0.5783301 0.79003490476906479 1 169 0 -0.6661818 0.33935234 0.59804704249396301 0 171 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 0.9649427 0.7241103 0.46571861194179465 1 247 1 0.07307994 0.51826185 0.94824689644610127 1 248 0 -0.4621131 0.386484653 0.70482866116462817 0 -249 0 +249 0 ? ? ? 0 250 0 -0.607840955 0.352551848 0.62716342858897578 0 252 0 0.210040927 0.552318037 1.1594538998856256 1 254 1 0.5551709 0.635334432 0.65441188637353787 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -0.644639254 0.344198585 0.60866907932442826 0 271 0 -0.6045689 0.3532991 0.62882950155653372 0 272 1 0.109180689 0.5272681 0.92339134731632821 1 -275 0 +275 0 ? ? ? 0 276 0 -0.6412319 0.3449681 0.61036295013192154 0 277 0 -0.6612671 0.340455025 0.60045705425766605 0 278 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -0.644639254 0.344198585 0.60866907932442826 0 293 1 0.2438618 0.560665131 0.83478874766701627 1 296 0 0.0298114419 0.5074523 1.0216646747888918 1 -297 0 +297 0 ? ? ? 0 299 1 0.29232645 0.5725656 0.80448706147450022 1 300 1 0.440484524 0.6083745 0.71696846696669991 1 301 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.23788023 0.5591912 0.83858636815416177 1 317 1 0.736536 0.676237941 0.56439713300164063 1 319 0 0.032892406 0.508222342 1.0239219003304794 1 -321 0 +321 0 ? ? ? 0 323 1 0.3295461 0.581648946 0.78177941761076775 1 327 0 -0.6612671 0.340455025 0.60045705425766605 0 328 1 0.309677362 0.5768065 0.79384070845234189 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 1.18309152 0.7655032 0.38551963408922602 1 613 0 -0.578392267 0.3593026 0.64228498181768678 0 614 0 -0.663600147 0.339931339 0.59931199209014285 0 -617 0 +617 0 ? ? ? 0 618 0 -0.6312144 0.347235233 0.61536490408676614 0 619 0 -0.621196747 0.349509329 0.62039972730149728 0 621 0 -0.2970261 0.426284641 0.80159295389429652 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -0.5587295 0.363841474 0.65254177576954708 0 19 0 -0.521061659 0.372604 0.67255179238253271 0 22 0 -0.597002566 0.355029762 0.63269550517202178 0 -23 1 +23 1 ? ? ? 0 24 0 -0.6152313 0.3508668 0.62341353797116505 0 26 0 -0.568058968 0.361684829 0.64765915777643768 0 27 0 -0.559334755 0.3637014 0.6522241546694586 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -0.586384356 0.35746488 0.6381527823333405 0 135 0 -0.4233432 0.395717025 0.72670379813371644 0 136 0 -0.578168631 0.3593541 0.6424009484072789 0 -139 0 +139 0 ? ? ? 0 140 0 -0.6164417 0.350591153 0.62280105585760936 0 142 1 0.253997982 0.5631603 0.82838245949919198 1 143 0 -0.455442041 0.388067663 0.70855595514156899 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -0.498274028 0.377946377 0.68488914380895782 0 155 1 0.381782949 0.5943031 0.75072925773026189 1 157 0 -0.5963973 0.355168372 0.63300558819075581 0 -158 0 +158 0 ? ? ? 0 159 1 1.14907432 0.7593418 0.3971787124185161 1 160 1 0.835970342 0.697615862 0.51949525142708575 1 162 0 -0.5775634 0.359493434 0.64271473576785443 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 0.867232 0.7041694 0.50600557448041072 1 232 0 -0.0153304338 0.496167481 0.98898385339386186 0 234 0 -0.28234 0.4298802 0.81066299300396294 0 -235 0 +235 0 ? ? ? 0 236 1 0.9360302 0.718297064 0.47734747567952907 1 238 1 1.21385264 0.770979941 0.37523476983545384 1 243 0 -0.3776855 0.406685263 0.7531304771823496 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1.42419529 0.8059953 0.31115669483110281 1 287 0 -0.586384356 0.35746488 0.6381527823333405 0 289 1 0.8870376 0.7082785 0.49761139182067243 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 0.7293541 0.674663544 0.56775988879752004 1 298 0 -0.4508557 0.389157325 0.711127239010238 0 302 1 1.38025045 0.7990312 0.3236762608744207 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -0.6152313 0.3508668 0.62341353797116505 0 310 0 -0.6052183 0.353150755 0.62849857907527806 0 313 0 -0.654109538 0.342064053 0.60398095649539585 0 -315 0 +315 0 ? ? ? 0 318 0 -0.5833766 0.358156025 0.63970545866432549 0 320 1 0.483023465 0.618461549 0.69324419149565364 1 322 0 -0.5775634 0.359493434 0.64271473576785443 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -0.6346704 0.3464523 0.61363554879852178 0 408 0 -0.398791075 0.401602834 0.74082475334425091 0 410 0 -0.6346704 0.3464523 0.61363554879852178 0 -411 0 +411 0 ? ? ? 0 412 1 0.851157367 0.7008099 0.51290494780474771 1 417 0 -0.6346704 0.3464523 0.61363554879852178 0 420 0 -0.3393041 0.415978521 0.77590666598714197 0 diff --git a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-CV-breast-cancer.txt index a2364522f6..ca9f631c71 100644 --- a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.6522908 0.9885379 0.01663180464505477 1 35 0 -1.12420082 0.0107460516 0.015587176400536187 0 37 0 -0.865502357 0.0386134759 0.056811513682664551 0 -40 0 +40 0 ? ? ? 0 41 1 -0.1372056 0.6145622 0.70236902985400396 0 44 1 1.0547893 0.9984863 0.0021854902279334833 1 45 0 -1.13070083 0.0104023358 0.015085999817509936 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.03150129 0.0170594156 0.024823881934527593 0 141 0 -1.15380061 0.00926659 0.0134311903289673 0 144 0 -1.12420082 0.0107460516 0.015587176400536187 0 -145 0 +145 0 ? ? ? 0 147 0 -1.03270125 0.0169580057 0.024675046951938132 0 150 0 -1.13440084 0.0102115627 0.0148079061901775 0 151 1 0.335192084 0.945543051 0.080784949020458824 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.17080045 0.008510069 0.012329973523626827 0 156 0 -1.01700115 0.0183329377 0.026694285275120175 0 161 0 -1.00710154 0.0192555338 0.028050804915513337 0 -164 0 +164 0 ? ? ? 0 167 1 0.6017914 0.98525393 0.021432495728287648 1 169 0 -1.15050054 0.009420992 0.013656046492446974 0 171 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1.7594862 0.999956965 6.208707291578748E-05 1 247 1 0.172593474 0.8841638 0.17761443218515088 1 248 0 -0.793102443 0.0547425 0.081220704071742325 0 -249 0 +249 0 ? ? ? 0 250 0 -1.04660094 0.0158256628 0.0230141968119793 0 252 0 0.423291922 0.964417338 4.8126817596940885 1 254 1 1.03148985 0.998297453 0.0024583495418655562 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.11160088 0.01144463 0.016606319988959024 0 271 0 -1.04360127 0.01606356 0.023362970756245272 0 272 1 0.243792772 0.9162424 0.12619873583154692 1 -275 0 +275 0 ? ? ? 0 276 0 -1.10720086 0.0116990069 0.016977604564195337 0 277 0 -1.14120078 0.009869945 0.014310057532988397 0 278 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.11160088 0.01144463 0.016606319988959024 0 293 1 0.478191853 0.97280544 0.039776797794310127 1 296 0 0.0886935 0.83318603 2.5836879844847194 1 -297 0 +297 0 ? ? ? 0 299 1 0.5653906 0.9823277 0.025723714124946184 1 300 1 0.8071904 0.994728446 0.0076253612609159995 1 301 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.472591877 0.9720465 0.040902773075660377 1 317 1 1.35288787 0.9996641 0.00048464251232972397 1 319 0 0.100693226 0.841446757 2.6569607115771978 1 -321 0 +321 0 ? ? ? 0 323 1 0.6351912 0.987516046 0.018123905012295945 1 327 0 -1.14120078 0.009869945 0.014310057532988397 0 328 1 0.5871916 0.9841424 0.023060980928491851 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.158784 0.9999943 8.2551908597278378E-06 1 613 0 -0.993201256 0.0206282046 0.030071445887912274 0 614 0 -1.14700067 0.009587526 0.013898610301947204 0 -617 0 +617 0 ? ? ? 0 618 0 -1.090201 0.0127353743 0.018491259389232038 0 619 0 -1.07320118 0.0138622615 0.020138926273650697 0 621 0 -0.4947039 0.207426473 0.33538331559891793 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.02980137 0.0147205992 0.021395199721883466 0 19 0 -0.9630018 0.0192868058 0.028096807322037363 0 22 0 -1.0968008 0.011213962 0.016269722836470393 0 -23 1 +23 1 ? ? ? 0 24 0 -1.13000059 0.00979631 0.014202769680513335 0 26 0 -1.04700089 0.0137287723 0.019943648017493942 0 27 0 -1.03000128 0.0147086745 0.021377739156459911 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.079601 0.0120263351 0.017455508586451673 0 135 0 -0.7955023 0.03769685 0.055436642942084668 0 136 0 -1.0634011 0.0128445318 0.018650780679196597 0 -139 0 +139 0 ? ? ? 0 140 0 -1.13040054 0.009780362 0.014179534189705549 0 142 1 0.3573923 0.8180728 0.2896988683892544 1 143 0 -0.8376017 0.03189309 0.046761717990888088 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -0.917201638 0.0231931154 0.033854726268903304 0 155 1 0.5948919 0.922758937 0.1159742899072022 1 157 0 -1.09660089 0.0112230852 0.016283034217764617 0 -158 0 +158 0 ? ? ? 0 159 1 1.88818586 0.9995909 0.00059036523296398255 1 160 1 1.36718786 0.996521652 0.0050269435856363917 1 162 0 -1.06320107 0.01285497 0.018666035936317264 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.39838827 0.9969394 0.0044222533640880836 1 232 0 -0.118705273 0.38808772 0.70860324232262772 0 234 0 -0.57100296 0.08979461 0.13573597071280369 0 -235 0 +235 0 ? ? ? 0 236 1 1.52918732 0.9982108 0.0025835996006501579 1 238 1 1.96808636 0.9997055 0.00042494571667690465 1 243 0 -0.703202665 0.0541654155 0.080340199841784268 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 2.32438421 0.999932 9.8119438852589428E-05 1 287 0 -1.079601 0.0120263351 0.017455508586451673 0 289 1 1.42798841 0.99728936 0.0039159372565436843 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.171589 0.9922556 0.011216254621805593 1 298 0 -0.8448025 0.0309909787 0.045417997957015462 0 302 1 2.26218414 0.999912143 0.00012675678201834402 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.13000059 0.00979631 0.014202769680513335 0 310 0 -1.11300075 0.0104985284 0.015226241971828952 0 313 0 -1.19720018 0.007447739 0.010785028389452023 0 -315 0 +315 0 ? ? ? 0 318 0 -1.07840109 0.0120851332 0.017541371482030246 0 320 1 0.77709043 0.961948335 0.055968683838555341 1 322 0 -1.06320107 0.01285497 0.018666035936317264 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.16360033 0.008542378 0.012376987044267063 0 408 0 -0.7606021 0.0432654768 0.063809436873055653 0 410 0 -1.16360033 0.008542378 0.012376987044267063 0 -411 0 +411 0 ? ? ? 0 412 1 1.35098827 0.9962829 0.0053726686646499901 1 417 0 -1.16360033 0.008542378 0.012376987044267063 0 420 0 -0.6557027 0.0650944263 0.097107435980156739 0 diff --git a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt index 9dec61f554..8153302a29 100644 --- a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.222367 0.9828292 0.024987354577091989 1 21 1 1.49796534 0.9935609 0.0093196803205346252 1 22 0 -1.19740391 0.009379565 0.013595712679399167 0 -23 1 +23 1 ? ? ? 0 24 0 -1.23810339 0.008112156 0.01175109505221196 0 25 1 0.3802781 0.7344174 0.4453278964846259 1 26 0 -1.14100432 0.01146578 0.016637187062001499 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.79460907 0.0387752019 0.057054226638771945 0 38 1 1.34886718 0.989039242 0.015900330726329494 1 39 1 0.6641748 0.8848001 0.17657658460400094 1 -40 0 +40 0 ? ? ? 0 41 1 0.223279715 0.611168444 0.7103580388075188 1 42 1 2.04505968 0.999095738 0.0013051645925104319 1 43 1 0.169680834 0.56447953 0.82500683046719547 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.15510464 0.0109047387 0.015818618982627604 0 137 0 -1.24130344 0.008020028 0.011617101215948859 0 138 0 -1.0749054 0.0144998142 0.021071951694510416 0 -139 0 +139 0 ? ? ? 0 140 0 -1.24130344 0.008020028 0.011617101215948859 0 141 0 -1.28360271 0.00689550675 0.0099825705316781733 0 142 1 0.643275 0.8769101 0.18949916409473391 1 143 0 -0.8591082 0.0309929028 0.04542086264678518 0 144 0 -1.23970342 0.008065961 0.011683906942824285 0 -145 0 +145 0 ? ? ? 0 146 1 0.4436772 0.7764816 0.36497630408526505 1 147 0 -1.13820457 0.01158053 0.016804665017851262 0 148 0 -0.320714116 0.181648508 0.2892074628941344 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.9467714 0.9550218 0.066394431489110783 1 156 0 -1.11830425 0.0124295028 0.018044356997620709 0 157 0 -1.19580388 0.009433212 0.013673843835136703 0 -158 0 +158 0 ? ? ? 0 159 1 2.5887537 0.9998721 0.0001845491876153846 1 160 1 1.92586017 0.9986121 0.0020036987954509607 1 161 0 -1.04420578 0.01616599 0.023513167231551696 0 162 0 -1.15350461 0.0109670116 0.015909453114236246 0 163 0 -1.00530624 0.0185497329 0.027012930786301039 0 -164 0 +164 0 ? ? ? 0 165 0 -0.9275073 0.0243961066 0.035632579294711073 0 166 1 2.06175947 0.9991484 0.0012290814661556479 1 167 1 1.15397048 0.9781426 0.031883260676163518 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.0278829336 0.437610149 0.83035753638984611 1 233 1 1.11616874 0.975037336 0.036470630963644925 1 234 0 -0.544210553 0.09034561 0.13660958234622511 0 -235 0 +235 0 ? ? ? 0 236 1 2.15735817 0.999396145 0.00087144125636392123 1 237 1 1.37376881 0.9899692 0.014544463077068375 1 238 1 2.659554 0.9999008 0.00014309666195095306 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.70435286 0.9999156 0.00012176885553963592 1 247 1 0.616675138 0.8662006 0.207226880023576 1 248 0 -0.7762085 0.04131975 0.060878383940094143 0 -249 0 +249 0 ? ? ? 0 250 0 -1.16220379 0.0106326314 0.015421777653289784 0 251 1 1.43526506 0.9919444 0.011668877043853167 1 252 0 0.8921714 0.9457866 4.2052065773298537 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.6439743 0.8771814 0.18905285611569214 1 273 1 -0.149016142 0.29164663 1.7777066932129042 0 274 0 -1.08650517 0.0139153069 0.020216532376881539 0 -275 0 +275 0 ? ? ? 0 276 0 -1.19740391 0.009379565 0.013595712679399167 0 277 0 -1.28200269 0.00693504466 0.010040008870103924 0 278 0 -1.23810339 0.008112156 0.01175109505221196 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.98086047 0.998861 0.0016441447437435472 1 290 0 -1.32590222 0.005927712 0.0085773281383116576 0 291 0 -1.23810339 0.008112156 0.01175109505221196 0 -292 1 +292 1 ? ? ? 0 293 1 1.05756974 0.9693573 0.044899543967322132 1 -294 0 +294 0 ? ? ? 0 295 1 1.67846322 0.9966264 0.0048753374261991068 1 296 0 0.418977976 0.760678768 2.062979700875831 1 -297 0 +297 0 ? ? ? 0 298 0 -0.865008354 0.0303616133 0.044481280497859745 0 299 1 1.15566754 0.9782728 0.031691271742118471 1 300 1 1.43996739 0.9920784 0.011473925159666753 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.7111747 0.9009488 0.15048293737780147 1 313 0 -1.32590222 0.005927712 0.0085773281383116576 0 314 0 -1.3091023 0.006294775 0.0091101437491470354 0 -315 0 +315 0 ? ? ? 0 316 1 0.9520712 0.955833852 0.065168231187009498 1 317 1 2.1167593 0.999301255 0.0010084286664473847 1 318 0 -1.15680456 0.0108389612 0.015722679136914018 0 319 0 0.43477726 0.7708744 2.1257893169799549 1 320 1 1.20646763 0.9818366 0.026445122755562926 1 -321 0 +321 0 ? ? ? 0 322 0 -1.15350461 0.0109670116 0.015909453114236246 0 323 1 1.1870687 0.980549 0.028338400890888689 1 324 0 -1.23810339 0.008112156 0.01175109505221196 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.775208354 0.0414625444 0.061093287384022553 0 409 0 -1.11720467 0.0124781644 0.018115446186086449 0 410 0 -1.28200269 0.00693504466 0.010040008870103924 0 -411 0 +411 0 ? ? ? 0 412 1 1.89956212 0.9984746 0.0022023701778286324 1 413 0 -0.9887064 0.01966903 0.028659194266170487 0 414 1 1.3937664 0.9906591 0.013539378350893604 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.266803 0.00732205063 0.010602349145536318 0 615 0 -0.994706035 0.0192570463 0.028053029788189736 0 616 0 -1.19740391 0.009379565 0.013595712679399167 0 -617 0 +617 0 ? ? ? 0 618 0 -1.15510464 0.0109047387 0.015818618982627604 0 619 0 -1.11280513 0.0126747517 0.018402674010350579 0 620 0 -1.19740391 0.009379565 0.013595712679399167 0 diff --git a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-TrainTest-breast-cancer.txt index 29a93c0326..da39a0132a 100644 --- a/test/BaselineOutput/SingleDebug/SGD/BinarySGD-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SGD/BinarySGD-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 0.822916269 0.694855034 0.52521607106152413 1 21 1 1.02655149 0.736246765 0.441738705665011 1 22 0 -1.01600647 0.265806019 0.44576680836978433 0 -23 1 +23 1 ? ? ? 0 24 0 -1.03864312 0.261411875 0.43715802871945769 0 25 1 0.184996247 0.5461176 0.87271643290195555 1 26 0 -0.9708479 0.274711519 0.46337315957197994 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.7265403 0.325954378 0.56908185228017971 0 38 1 0.933466434 0.717778 0.47839033548395937 1 39 1 0.4073757 0.600458443 0.7358636922780708 1 -40 0 +40 0 ? ? ? 0 41 1 0.04465425 0.5111617 0.96814839425710286 1 42 1 1.43612671 0.8078541 0.30783330336923359 1 43 1 0.0479370356 0.511981964 0.96583510643055481 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -0.9885073 0.271207035 0.45641906112335939 0 137 0 -1.04836845 0.259538531 0.4335034307664255 0 138 0 -0.9260764 0.283721417 0.48140728969854896 0 -139 0 +139 0 ? ? ? 0 140 0 -1.04836845 0.259538531 0.4335034307664255 0 141 0 -1.07586777 0.2542888 0.42331107075059288 0 142 1 0.373193622 0.592230439 0.75576945149404562 1 143 0 -0.7654944 0.317454576 0.55100303407297502 0 144 0 -1.04350591 0.2604741 0.43532739229387868 0 -145 0 +145 0 ? ? ? 0 146 1 0.2147963 0.553493559 0.85336156547352904 1 147 0 -0.9662212 0.275634348 0.46520995753133637 0 148 0 -0.318193555 0.421116054 0.78865394770237895 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.600466 0.6457629 0.63092349091318589 1 156 0 -0.9430999 0.2802746 0.47448152072334643 0 157 0 -1.01114392 0.266756058 0.44763484776839352 0 -158 0 +158 0 ? ? ? 0 159 1 1.87013483 0.866473854 0.20677187910596315 1 160 1 1.34225249 0.7928601 0.33486178660539723 1 161 0 -0.901281357 0.288787246 0.49164689849299364 0 162 0 -0.9836446 0.2721692 0.45832499621527728 0 163 0 -0.855002642 0.2983845 0.51124744948014056 0 -164 0 +164 0 ? ? ? 0 165 0 -0.8168132 0.306440562 0.5279085681868505 0 166 1 1.49839211 0.817334533 0.29100140459599977 1 167 1 0.8153703 0.6932527 0.5285468008024331 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.08038044 0.4799157 0.9431826305616432 0 233 1 0.6980891 0.667763948 0.58258988829378422 1 234 0 -0.49562943 0.378568321 0.68633230772699549 0 -235 0 +235 0 ? ? ? 0 236 1 1.52862382 0.8218049 0.28313219549408758 1 237 1 0.9843018 0.727960944 0.45806704440223556 1 238 1 1.96605682 0.877186954 0.18904373922390869 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1.97140193 0.8777616 0.18809893386755816 1 247 1 0.323040724 0.580065131 0.78571319608569612 1 248 0 -0.6818254 0.335854024 0.59042772056631165 0 -249 0 +249 0 ? ? ? 0 250 0 -0.9754619 0.273793161 0.46154757804170599 0 251 1 0.9712138 0.725361347 0.46322822535916075 1 252 0 0.5616691 0.6368387 1.4613175226151991 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.354397058 0.587683439 0.76688885070680657 1 273 1 -0.237432361 0.4409192 1.1814138244518058 0 274 0 -0.9287806 0.28317216 0.48030142611403009 0 -275 0 +275 0 ? ? ? 0 276 0 -1.01600647 0.265806019 0.44576680836978433 0 277 0 -1.07100511 0.255211979 0.42509822607281988 0 278 0 -1.03864312 0.261411875 0.43715802871945769 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.4364171 0.8078992 0.30775283385408775 1 290 0 -1.10336709 0.249109536 0.4133256254437066 0 291 0 -1.03864312 0.261411875 0.43715802871945769 0 -292 1 +292 1 ? ? ? 0 293 1 0.6671076 0.6608552 0.59759395409472371 1 -294 0 +294 0 ? ? ? 0 295 1 1.17509961 0.764065564 0.38823165513968877 1 296 0 0.224430084 0.5558732 1.1709565135958244 1 -297 0 +297 0 ? ? ? 0 298 0 -0.7844506 0.313361466 0.54237727145415437 0 299 1 0.748945951 0.678949 0.55862488926125509 1 300 1 1.01898217 0.7347743 0.44462694496525584 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.4770298 0.617046237 0.69654949630747753 1 313 0 -1.10336709 0.249109536 0.4133256254437066 0 314 0 -1.09132075 0.251369655 0.41767456748452503 0 -315 0 +315 0 ? ? ? 0 316 1 0.5936177 0.6441948 0.63443112049115613 1 317 1 1.53676748 0.8229943 0.28104567061447505 1 318 0 -0.977696 0.2733492 0.46066585861057346 0 319 0 0.228058934 0.5567689 1.1738689621124481 1 320 1 0.7848048 0.6867147 0.54221722972478248 1 -321 0 +321 0 ? ? ? 0 322 0 -0.9836446 0.2721692 0.45832499621527728 0 323 1 0.799038649 0.6897688 0.53581523979313106 1 324 0 -1.03864312 0.261411875 0.43715802871945769 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.6859037 0.334944934 0.58845429463916576 0 409 0 -0.9535757 0.2781663 0.47026158367983178 0 410 0 -1.07100511 0.255211979 0.42509822607281988 0 -411 0 +411 0 ? ? ? 0 412 1 1.353524 0.794705153 0.331508396641091 1 413 0 -0.8662151 0.296042472 0.50643970604072797 0 414 1 0.9272311 0.7165132 0.48093477882048502 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.06382155 0.256579816 0.42775023737254403 0 615 0 -0.863645554 0.296578258 0.50753816766843873 0 616 0 -1.01600647 0.265806019 0.44576680836978433 0 -617 0 +617 0 ? ? ? 0 618 0 -0.9885073 0.271207035 0.45641906112335939 0 619 0 -0.961007953 0.276676446 0.46728696355201749 0 620 0 -1.01600647 0.265806019 0.44576680836978433 0 diff --git a/test/BaselineOutput/SingleDebug/SavePipe/TestParquetPrimitiveDataTypes-Data.txt b/test/BaselineOutput/SingleDebug/SavePipe/TestParquetPrimitiveDataTypes-Data.txt index af1e19e1cc..85a3d35b4b 100644 --- a/test/BaselineOutput/SingleDebug/SavePipe/TestParquetPrimitiveDataTypes-Data.txt +++ b/test/BaselineOutput/SingleDebug/SavePipe/TestParquetPrimitiveDataTypes-Data.txt @@ -11,5 +11,5 @@ #@ col=string:TX:7 #@ } sbyte short int long bool DateTimeOffset Interval string - 1 "2018-09-01T19:53:18.2910000+00:00" "31.00:00:00.0010000" "" +-128 -32768 -2147483648 -9223372036854775808 1 "2018-09-01T19:53:18.2910000+00:00" "31.00:00:00.0010000" "" 127 32767 2147483647 9223372036854775807 0 "2018-09-01T19:53:18.3110000+00:00" "31.00:00:00.0010000" """""" diff --git a/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-CV-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-CV-breast-cancer.txt index d0e7499c6d..cc1083e607 100644 --- a/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 454.251282 1 0 1 35 0 -320.737427 0 0 0 37 0 -78.9787 5.011722E-35 0 0 -40 0 +40 0 ? ? ? 0 41 1 199.0091 1 0 1 44 1 656.8247 1 0 1 45 0 -322.804565 0 0 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -289.415222 0 0 0 141 0 -344.61084 0 0 0 144 0 -320.737427 0 0 0 -145 0 +145 0 ? ? ? 0 147 0 -309.023651 0 0 0 150 0 -272.79837 0 0 0 151 1 249.55658 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -349.126221 0 0 0 156 0 -227.212433 0 0 0 161 0 -274.6302 0 0 0 -164 0 +164 0 ? ? ? 0 167 1 283.316284 1 0 1 169 0 -331.047241 0 0 0 171 0 -301.379425 0 0 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1044.54126 1 0 1 247 1 409.237671 1 0 1 248 0 -221.818024 0 0 0 -249 0 +249 0 ? ? ? 0 250 0 -251.085815 0 0 0 252 0 307.68988 1 Infinity 1 254 1 728.536743 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -301.379425 0 0 0 271 0 -283.3178 0 0 0 272 1 408.017578 1 0 1 -275 0 +275 0 ? ? ? 0 276 0 -316.222015 0 0 0 277 0 -325.252838 0 0 0 278 0 -301.379425 0 0 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -301.379425 0 0 0 293 1 386.6949 1 0 1 296 0 139.220642 1 Infinity 1 -297 0 +297 0 ? ? ? 0 299 1 227.814941 1 0 1 300 1 407.6792 1 0 1 301 0 -301.379425 0 0 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 466.170166 1 0 1 317 1 736.0132 1 0 1 319 0 161.598083 1 Infinity 1 -321 0 +321 0 ? ? ? 0 323 1 388.03302 1 0 1 327 0 -325.252838 0 0 0 328 1 584.984 1 0 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 1115.01685 1 0 1 613 0 -169.23941 0 0 0 614 0 -292.156342 0 0 0 -617 0 +617 0 ? ? ? 0 618 0 -311.7066 0 0 0 619 0 -307.1912 0 0 0 621 0 -15.8763733 1.27344038E-07 1.8371862313930792E-07 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -643.057739 0 0 0 19 0 -668.631836 0 0 0 22 0 -540.900146 0 0 0 -23 1 +23 1 ? ? ? 0 24 0 -604.696655 0 0 0 26 0 -270.657074 0 0 0 27 0 -566.4742 0 0 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -670.7141 0 0 0 135 0 -421.652374 0 0 0 136 0 -553.6872 0 0 0 -139 0 +139 0 ? ? ? 0 140 0 -451.529541 0 0 0 142 1 488.315338 1 0 1 143 0 -142.331116 0 0 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -322.504425 0 0 0 155 1 1089.60254 1 0 1 157 0 -617.483643 0 0 0 -158 0 +158 0 ? ? ? 0 159 1 1923.50525 1 0 1 160 1 1494.5094 1 0 1 162 0 -630.2707 0 0 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1347.16248 1 0 1 232 0 355.92923 1 Infinity 1 234 0 50.92752 1 Infinity 1 -235 0 +235 0 ? ? ? 0 236 1 1204.77161 1 0 1 238 1 2421.62354 1 0 1 243 0 -499.813416 0 0 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1933.38391 1 0 1 287 0 -670.7141 0 0 0 289 1 1586.01746 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 906.5393 1 0 1 298 0 -764.4775 0 0 0 302 1 1682.504 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -604.696655 0 0 0 310 0 -657.927 0 0 0 313 0 -425.9555 0 0 0 -315 0 +315 0 ? ? ? 0 318 0 -994.1385 0 0 0 320 1 276.6519 1 0 1 322 0 -630.2707 0 0 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -515.32605 0 0 0 408 0 -106.253662 0 0 0 410 0 -515.32605 0 0 0 -411 0 +411 0 ? ? ? 0 412 1 1142.7179 1 0 1 417 0 -515.32605 0 0 0 420 0 -151.036346 0 0 0 diff --git a/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-TrainTest-breast-cancer.txt index 4a57fddad1..f3130d53ec 100644 --- a/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleDebug/SymSGD/SymSGD-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 10.1497192 0.9999609 5.6411412351548271E-05 1 21 1 -61.521698 1.91190378E-27 88.757048641689195 0 22 0 -407.94165 0 0 0 -23 1 +23 1 ? ? ? 0 24 0 -413.829132 0 0 0 25 1 -111.690765 0 Infinity 0 26 0 -333.49762 0 0 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -367.9438 0 0 0 38 1 125.049805 1 0 1 39 1 -120.420319 0 Infinity 0 -40 0 +40 0 ? ? ? 0 41 1 -214.114883 0 Infinity 0 42 1 86.67383 1 0 1 43 1 -299.954132 0 Infinity 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -408.326935 0 0 0 137 0 -401.2836 0 0 0 138 0 -411.7511 0 0 0 -139 0 +139 0 ? ? ? 0 140 0 -401.2836 0 0 0 141 0 -400.898315 0 0 0 142 1 -108.134125 0 Infinity 0 143 0 -305.7808 0 0 0 144 0 -407.556366 0 0 0 -145 0 +145 0 ? ? ? 0 146 1 -205.1228 0 Infinity 0 147 0 -408.638123 0 0 0 148 0 -448.917847 0 0 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 39.9500122 1 0 1 156 0 -361.30127 0 0 0 157 0 -414.214417 0 0 0 -158 0 +158 0 ? ? ? 0 159 1 214.183228 1 0 1 160 1 120.047485 1 0 1 161 0 -401.219147 0 0 0 162 0 -414.5997 0 0 0 163 0 -282.1694 0 0 0 -164 0 +164 0 ? ? ? 0 165 0 -377.536072 0 0 0 166 1 123.761658 1 0 1 167 1 -9.15014648 0.000106192965 13.201024182527696 0 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -256.504028 0 0 0 233 1 -84.9973755 1.21929513E-37 122.62529213957316 0 234 0 -275.7568 0 0 0 -235 0 +235 0 ? ? ? 0 236 1 116.098267 1 0 1 237 1 26.0986938 1 0 1 238 1 370.027954 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 321.109131 1 0 1 247 1 -61.9207153 1.28284749E-27 89.332708892981643 0 248 0 -346.1557 0 0 0 -249 0 +249 0 ? ? ? 0 250 0 -354.643219 0 0 0 251 1 108.2807 1 0 1 252 0 -4.193939 0.0148625113 0.021603009492489122 0 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 -151.574524 0 Infinity 0 273 1 -303.688629 0 Infinity 0 274 0 -400.833862 0 0 0 -275 0 +275 0 ? ? ? 0 276 0 -407.94165 0 0 0 277 0 -407.171082 0 0 0 278 0 -413.829132 0 0 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 175.671082 1 0 1 290 0 -400.513 0 0 0 291 0 -413.829132 0 0 0 -292 1 +292 1 ? ? ? 0 293 1 51.4936523 1 0 1 -294 0 +294 0 ? ? ? 0 295 1 5.8543396 0.997140765 0.0041309123233919023 1 296 0 -173.148254 0 0 0 -297 0 +297 0 ? ? ? 0 298 0 -429.3664 0 0 0 299 1 -112.8385 0 Infinity 0 300 1 -114.377228 0 Infinity 0 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 -175.547363 0 Infinity 0 313 0 -400.513 0 0 0 314 0 -382.020966 0 0 0 -315 0 +315 0 ? ? ? 0 316 1 -56.5515747 2.753995E-25 81.586676422594735 0 317 1 168.155884 1 0 1 318 0 -489.2794 0 0 0 319 0 -232.038055 0 0 0 320 1 16.8273315 0.99999994 8.5991327994145617E-08 1 -321 0 +321 0 ? ? ? 0 322 0 -414.5997 0 0 0 323 1 72.79901 1 0 1 324 0 -413.829132 0 0 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -278.598877 0 0 0 409 0 -411.365784 0 0 0 410 0 -407.171082 0 0 0 -411 0 +411 0 ? ? ? 0 412 1 21.296814 1 0 1 413 0 -418.794434 0 0 0 414 1 17.1500549 0.99999994 8.5991327994145617E-08 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -382.40625 0 0 0 615 0 -415.175232 0 0 0 616 0 -407.94165 0 0 0 -617 0 +617 0 ? ? ? 0 618 0 -408.326935 0 0 0 619 0 -408.712219 0 0 0 620 0 -407.94165 0 0 0 diff --git a/test/BaselineOutput/SingleDebug/Transform/Concat/Concat1.tsv b/test/BaselineOutput/SingleDebug/Transform/Concat/Concat1.tsv index 36b453f708..c548a80a00 100644 --- a/test/BaselineOutput/SingleDebug/Transform/Concat/Concat1.tsv +++ b/test/BaselineOutput/SingleDebug/Transform/Concat/Concat1.tsv @@ -7,13 +7,13 @@ #@ col=f4:R4:8-** #@ } float1 float1 float1 float4.age float4.fnlwgt float4.education-num float4.capital-gain float1 -25 25 25 25 226802 7 0 25 25 226802 7 0 0 40 0 25 -38 38 38 38 89814 9 0 38 38 89814 9 0 0 50 0 38 -28 28 28 28 336951 12 0 28 28 336951 12 0 0 40 1 28 -44 44 44 44 160323 10 7688 44 44 160323 10 7688 0 40 1 44 -18 18 18 18 103497 10 0 18 18 103497 10 0 0 30 0 18 -34 34 34 34 198693 6 0 34 34 198693 6 0 0 30 0 34 -29 29 29 29 227026 9 0 29 29 227026 9 0 0 40 0 29 -63 63 63 63 104626 15 3103 63 63 104626 15 3103 0 32 1 63 -24 24 24 24 369667 10 0 24 24 369667 10 0 0 40 0 24 -55 55 55 55 104996 4 0 55 55 104996 4 0 0 10 0 55 +25 25 25 25 226802 7 0 25 25 226802 7 0 0 40 ? 0 25 +38 38 38 38 89814 9 0 38 38 89814 9 0 0 50 ? 0 38 +28 28 28 28 336951 12 0 28 28 336951 12 0 0 40 ? 1 28 +44 44 44 44 160323 10 7688 44 44 160323 10 7688 0 40 ? 1 44 +18 18 18 18 103497 10 0 18 18 103497 10 0 0 30 ? 0 18 +34 34 34 34 198693 6 0 34 34 198693 6 0 0 30 ? 0 34 +29 29 29 29 227026 9 0 29 29 227026 9 0 0 40 ? 0 29 +63 63 63 63 104626 15 3103 63 63 104626 15 3103 0 32 ? 1 63 +24 24 24 24 369667 10 0 24 24 369667 10 0 0 40 ? 0 24 +55 55 55 55 104996 4 0 55 55 104996 4 0 0 10 ? 0 55 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt index eb8fd43aae..730e9c8fa9 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.PAVcalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 3.64133739 1 0 1 35 0 -2.796535 1E-15 1.4415419267167138E-15 0 37 0 -1.65404248 1E-15 1.4415419267167138E-15 0 -40 0 +40 0 ? ? ? 0 41 1 1.04337406 0.8947368 0.16046469748481262 1 44 1 4.33966541 1 0 1 45 0 -2.89273548 1E-15 1.4415419267167138E-15 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -2.35179329 1E-15 1.4415419267167138E-15 0 141 0 -2.904073 1E-15 1.4415419267167138E-15 0 144 0 -2.796535 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 147 0 -2.463921 1E-15 1.4415419267167138E-15 0 150 0 -2.8614285 1E-15 1.4415419267167138E-15 0 151 1 3.17632246 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -3.01920986 1E-15 1.4415419267167138E-15 0 156 0 -2.49565363 1E-15 1.4415419267167138E-15 0 161 0 -2.30924 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 167 1 2.38255262 0.8947368 0.16046469748481262 1 169 0 -3.03097248 1E-15 1.4415419267167138E-15 0 171 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.227234 1 0 1 247 1 2.672493 0.8947368 0.16046469748481262 1 248 0 -1.711307 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -2.60319138 1E-15 1.4415419267167138E-15 0 252 0 2.992918 0.8947368 3.2479272984652883 1 254 1 5.35164165 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -2.804134 1E-15 1.4415419267167138E-15 0 271 0 -2.34358668 1E-15 1.4415419267167138E-15 0 272 1 2.73419476 0.8947368 0.16046469748481262 1 -275 0 +275 0 ? ? ? 0 276 0 -2.68139815 1E-15 1.4415419267167138E-15 0 277 0 -2.91167164 1E-15 1.4415419267167138E-15 0 278 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -2.804134 1E-15 1.4415419267167138E-15 0 293 1 3.413344 1 0 1 296 0 1.47483253 0.8947368 3.2479272984652883 1 -297 0 +297 0 ? ? ? 0 299 1 3.53376913 1 0 1 300 1 3.78027344 1 0 1 301 0 -2.804134 1E-15 1.4415419267167138E-15 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.45851088 1 0 1 317 1 5.48386 1 0 1 319 0 1.44604874 0.8947368 3.2479272984652883 1 -321 0 +321 0 ? ? ? 0 323 1 3.36483288 1 0 1 327 0 -2.91167164 1E-15 1.4415419267167138E-15 0 328 1 3.03614 0.8947368 0.16046469748481262 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 8.167599 1 0 1 613 0 -2.39343452 1E-15 1.4415419267167138E-15 0 614 0 -2.85382986 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -2.56626129 1E-15 1.4415419267167138E-15 0 619 0 -2.45112467 1E-15 1.4415419267167138E-15 0 621 0 -0.5349846 0.2 0.32192810026182023 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.27471757 1E-15 1.4415419267167138E-15 0 19 0 -2.00358748 1E-15 1.4415419267167138E-15 0 22 0 -2.5426836 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -2.68141246 1E-15 1.4415419267167138E-15 0 26 0 -2.33200574 1E-15 1.4415419267167138E-15 0 27 0 -2.27155375 1E-15 1.4415419267167138E-15 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.546271 1E-15 1.4415419267167138E-15 0 135 0 -1.52827668 1E-15 1.4415419267167138E-15 0 136 0 -2.40711856 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -2.67508459 1E-15 1.4415419267167138E-15 0 142 1 2.1587286 0.9117647 0.13326656969825684 1 143 0 -1.54474568 1E-15 1.4415419267167138E-15 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.83878517 1E-15 1.4415419267167138E-15 0 155 1 3.237853 0.9117647 0.13326656969825684 1 157 0 -2.54584742 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 6.77223158 1 0 1 160 1 5.42115831 1 0 1 162 0 -2.41028237 1E-15 1.4415419267167138E-15 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.187345 1 0 1 232 0 0.397937775 0.714285731 1.8073550080489322 1 234 0 -1.183644 1E-15 1.4415419267167138E-15 0 -235 0 +235 0 ? ? ? 0 236 1 5.6324296 1 0 1 238 1 6.77774 1 0 1 243 0 -1.01514125 1E-15 1.4415419267167138E-15 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.8132205 1 0 1 287 0 -2.546271 1E-15 1.4415419267167138E-15 0 289 1 5.25516939 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77293825 1 0 1 298 0 -1.80093193 1E-15 1.4415419267167138E-15 0 302 1 7.77011251 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.68141246 1E-15 1.4415419267167138E-15 0 310 0 -2.68183613 1E-15 1.4415419267167138E-15 0 313 0 -2.94621468 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 318 0 -2.69217515 1E-15 1.4415419267167138E-15 0 320 1 3.800508 0.9444444 0.082462200658479604 1 322 0 -2.41028237 1E-15 1.4415419267167138E-15 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.81381369 1E-15 1.4415419267167138E-15 0 408 0 -1.46855927 1E-15 1.4415419267167138E-15 0 410 0 -2.81381369 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 4.83788157 1 0 1 417 0 -2.81381369 1E-15 1.4415419267167138E-15 0 420 0 -1.04175115 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt index 3b51cd60d5..689d51fcc1 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.calibrateRandom.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 3.64133739 0.9844741 0.022574810341618422 1 35 0 -2.796535 0.0199539755 0.029078592776682857 0 37 0 -1.65404248 0.0782266259 0.11751599963260938 0 -40 0 +40 0 ? ? ? 0 41 1 1.04337406 0.7116856 0.49068805388019454 1 44 1 4.33966541 0.9934526 0.0094769477341525628 1 45 0 -2.89273548 0.0177341755 0.025814589979333783 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -2.35179329 0.03427408 0.050314293386144675 0 141 0 -2.904073 0.0174890943 0.025454673744601461 0 144 0 -2.796535 0.0199539755 0.029078592776682857 0 -145 0 +145 0 ? ? ? 0 147 0 -2.463921 0.0299276374 0.04383572570114834 0 150 0 -2.8614285 0.01842858 0.026834852844454766 0 151 1 3.17632246 0.9725775 0.040114860620962978 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -3.01920986 0.0151813291 0.022069981158425081 0 156 0 -2.49565363 0.0287977811 0.042156377165560456 0 161 0 -2.30924 0.0360781476 0.053011906638395674 0 -164 0 +164 0 ? ? ? 0 167 1 2.38255262 0.9293544 0.10569919063924076 1 169 0 -3.03097248 0.0149631584 0.021750410734051751 0 171 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.227234 0.999821365 0.00025773902362411926 1 247 1 2.672493 0.9497448 0.074388155670230882 1 248 0 -1.711307 0.07322063 0.10970216917333013 0 -249 0 +249 0 ? ? ? 0 250 0 -2.60319138 0.0252686124 0.036923393165046718 0 252 0 2.992918 0.9657571 4.8680501214754637 1 254 1 5.35164165 0.998142242 0.0026826702788205732 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -2.804134 0.0197691489 0.028806541052408416 0 271 0 -2.34358668 0.0346150957 0.050823827322496622 0 272 1 2.73419476 0.9532994 0.068998701675288501 1 -275 0 +275 0 ? ? ? 0 276 0 -2.68139815 0.0229703225 0.033525709865030048 0 277 0 -2.91167164 0.0173267 0.025216237008576704 0 278 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -2.804134 0.0197691489 0.028806541052408416 0 293 1 3.413344 0.9794621 0.029938444898768346 1 296 0 1.47483253 0.8088676 2.3873555377194497 1 -297 0 +297 0 ? ? ? 0 299 1 3.53376913 0.982280254 0.025793396317687449 1 300 1 3.78027344 0.986915946 0.019000876915907086 1 301 0 -2.804134 0.0197691489 0.028806541052408416 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.45851088 0.980567157 0.028311653515975352 1 317 1 5.48386 0.9984247 0.0022744566762884174 1 319 0 1.44604874 0.803245664 2.3455326626007635 1 -321 0 +321 0 ? ? ? 0 323 1 3.36483288 0.978206754 0.031788669528507826 1 327 0 -2.91167164 0.0173267 0.025216237008576704 0 328 1 3.03614 0.9674988 0.047668253892284254 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 8.167599 0.9999448 7.9630164919418245E-05 1 613 0 -2.39343452 0.03259308 0.047805235639227908 0 614 0 -2.85382986 0.0186011065 0.027088450049153875 0 -617 0 +617 0 ? ? ? 0 618 0 -2.56626129 0.02643034 0.038643886518904186 0 619 0 -2.45112467 0.0303953178 0.044531429350354604 0 621 0 -0.5349846 0.255690753 0.42602593625059765 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.27471757 0.0249303821 0.036422867055812318 0 19 0 -2.00358748 0.03410183 0.050056992617068342 0 22 0 -2.5426836 0.0182464458 0.026567179066779304 0 -23 1 +23 1 ? ? ? 0 24 0 -2.68141246 0.015512228 0.022554807909292037 0 26 0 -2.33200574 0.02332543 0.034050161954667077 0 27 0 -2.27155375 0.0250220858 0.036558556406276949 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.546271 0.0181701127 0.026455011357386121 0 135 0 -1.52827668 0.0585265532 0.087007689086403689 0 136 0 -2.40711856 0.0213731956 0.031169296396687675 0 -139 0 +139 0 ? ? ? 0 140 0 -2.67508459 0.015627671 0.022723991158114246 0 142 1 2.1587286 0.83348304 0.26277525125659912 1 143 0 -1.54474568 0.05745574 0.085367730161702821 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.83878517 0.0411879122 0.060679997683715341 0 155 1 3.237853 0.947596252 0.077655602601770859 1 157 0 -2.54584742 0.0181791112 0.026468233670711376 0 -158 0 +158 0 ? ? ? 0 159 1 6.77223158 0.999177039 0.0011877710421138682 1 160 1 5.42115831 0.995904 0.0059213730758132374 1 162 0 -2.41028237 0.0212945715 0.031053393109467146 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.187345 0.9945968 0.0078163354020312284 1 232 0 0.397937775 0.381006956 0.69200489802945642 1 234 0 -1.183644 0.085664086 0.12920380666492423 0 -235 0 +235 0 ? ? ? 0 236 1 5.6324296 0.9968118 0.0046069378176225089 1 238 1 6.77774 0.9991824 0.0011800254692933552 1 243 0 -1.01514125 0.102734588 0.15639329612707187 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.8132205 0.999761462 0.00034417833577817593 1 287 0 -2.546271 0.0181701127 0.026455011357386121 0 289 1 5.25516939 0.9950138 0.0072115986401722143 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77293825 0.9911819 0.012778237651616852 1 298 0 -1.80093193 0.0430044457 0.063415872213561694 0 302 1 7.77011251 0.999748945 0.00036224093577622284 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.68141246 0.015512228 0.022554807909292037 0 310 0 -2.68183613 0.0155045288 0.022543525273161929 0 313 0 -2.94621468 0.0113662509 0.016491938164477051 0 -315 0 +315 0 ? ? ? 0 318 0 -2.69217515 0.0153178023 0.022269919361612874 0 320 1 3.800508 0.9724724 0.040270746291192143 1 322 0 -2.41028237 0.0212945715 0.031053393109467146 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.81381369 0.0132805621 0.019288165348886492 0 408 0 -1.46855927 0.06256822 0.093214386421141548 0 410 0 -2.81381369 0.0132805621 0.019288165348886492 0 -411 0 +411 0 ? ? ? 0 412 1 4.83788157 0.991832554 0.011831516022545778 1 417 0 -2.81381369 0.0132805621 0.019288165348886492 0 420 0 -1.04175115 0.09985152 0.15176509864238413 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt index 09a2fb9aa7..e51f951dee 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.nocalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Assigned 32 1 3.64133739 1 35 0 -2.796535 0 37 0 -1.65404248 0 -40 0 +40 0 ? 0 41 1 1.04337406 1 44 1 4.33966541 1 45 0 -2.89273548 0 @@ -76,7 +76,7 @@ Instance Label Score Assigned 138 0 -2.35179329 0 141 0 -2.904073 0 144 0 -2.796535 0 -145 0 +145 0 ? 0 147 0 -2.463921 0 150 0 -2.8614285 0 151 1 3.17632246 1 @@ -84,7 +84,7 @@ Instance Label Score Assigned 154 0 -3.01920986 0 156 0 -2.49565363 0 161 0 -2.30924 0 -164 0 +164 0 ? 0 167 1 2.38255262 1 169 0 -3.03097248 0 171 0 -2.804134 0 @@ -130,7 +130,7 @@ Instance Label Score Assigned 246 1 7.227234 1 247 1 2.672493 1 248 0 -1.711307 0 -249 0 +249 0 ? 0 250 0 -2.60319138 0 252 0 2.992918 1 254 1 5.35164165 1 @@ -144,7 +144,7 @@ Instance Label Score Assigned 269 0 -2.804134 0 271 0 -2.34358668 0 272 1 2.73419476 1 -275 0 +275 0 ? 0 276 0 -2.68139815 0 277 0 -2.91167164 0 278 0 -2.804134 0 @@ -158,7 +158,7 @@ Instance Label Score Assigned 291 0 -2.804134 0 293 1 3.413344 1 296 0 1.47483253 1 -297 0 +297 0 ? 0 299 1 3.53376913 1 300 1 3.78027344 1 301 0 -2.804134 0 @@ -172,7 +172,7 @@ Instance Label Score Assigned 316 1 3.45851088 1 317 1 5.48386 1 319 0 1.44604874 1 -321 0 +321 0 ? 0 323 1 3.36483288 1 327 0 -2.91167164 0 328 1 3.03614 1 @@ -318,7 +318,7 @@ Instance Label Score Assigned 612 1 8.167599 1 613 0 -2.39343452 0 614 0 -2.85382986 0 -617 0 +617 0 ? 0 618 0 -2.56626129 0 619 0 -2.45112467 0 621 0 -0.5349846 0 @@ -375,7 +375,7 @@ Instance Label Score Assigned 17 0 -2.27471757 0 19 0 -2.00358748 0 22 0 -2.5426836 0 -23 1 +23 1 ? 0 24 0 -2.68141246 0 26 0 -2.33200574 0 27 0 -2.27155375 0 @@ -425,7 +425,7 @@ Instance Label Score Assigned 134 0 -2.546271 0 135 0 -1.52827668 0 136 0 -2.40711856 0 -139 0 +139 0 ? 0 140 0 -2.67508459 0 142 1 2.1587286 1 143 0 -1.54474568 0 @@ -435,7 +435,7 @@ Instance Label Score Assigned 153 0 -1.83878517 0 155 1 3.237853 1 157 0 -2.54584742 0 -158 0 +158 0 ? 0 159 1 6.77223158 1 160 1 5.42115831 1 162 0 -2.41028237 0 @@ -474,7 +474,7 @@ Instance Label Score Assigned 231 1 5.187345 1 232 0 0.397937775 1 234 0 -1.183644 0 -235 0 +235 0 ? 0 236 1 5.6324296 1 238 1 6.77774 1 243 0 -1.01514125 0 @@ -496,8 +496,8 @@ Instance Label Score Assigned 286 1 7.8132205 1 287 0 -2.546271 0 289 1 5.25516939 1 -292 1 -294 0 +292 1 ? 0 +294 0 ? 0 295 1 4.77293825 1 298 0 -1.80093193 0 302 1 7.77011251 1 @@ -506,7 +506,7 @@ Instance Label Score Assigned 307 0 -2.68141246 0 310 0 -2.68183613 0 313 0 -2.94621468 0 -315 0 +315 0 ? 0 318 0 -2.69217515 0 320 1 3.800508 1 322 0 -2.41028237 0 @@ -551,7 +551,7 @@ Instance Label Score Assigned 407 0 -2.81381369 0 408 0 -1.46855927 0 410 0 -2.81381369 0 -411 0 +411 0 ? 0 412 1 4.83788157 1 417 0 -2.81381369 0 420 0 -1.04175115 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt index 693440edc2..7664c7d409 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 2.35438 0.996385 0.0052248235129257635 1 35 0 -1.83556986 0.00724350661 0.010488202773633042 0 37 0 -0.8828192 0.07420456 0.11123463299619094 0 -40 0 +40 0 ? ? ? 0 41 1 0.6366718 0.7855496 0.34822575879653411 1 44 1 2.72072053 0.998558342 0.002081372862402793 1 45 0 -1.87994158 0.00648348359 0.0093841435784006184 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.51871562 0.0159320254 0.02317012154973026 0 141 0 -1.93865037 0.00559835136 0.0080994066187532153 0 144 0 -1.83556986 0.00724350661 0.010488202773633042 0 -145 0 +145 0 ? ? ? 0 147 0 -1.69274938 0.010342177 0.014998299390129487 0 150 0 -1.91297352 0.00596963847 0.0086381769438633477 0 151 1 1.55168462 0.973401248 0.038893469912665984 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -2.0423522 0.004318493 0.0062437599081359638 0 156 0 -1.73049688 0.009414184 0.013646131293487658 0 161 0 -1.46562588 0.0181668717 0.026450249055215744 0 -164 0 +164 0 ? ? ? 0 167 1 2.31870866 0.99604696 0.0057143326416850991 1 169 0 -2.041339 0.00432946626 0.0062596597963016712 0 171 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.8998096 0.9999256 0.00010732116581602008 1 247 1 0.9928844 0.8997386 0.15242216136874395 1 248 0 -1.22778583 0.03256034 0.047756414470681303 0 -249 0 +249 0 ? ? ? 0 250 0 -1.83357728 0.00727963867 0.010540711608751351 0 252 0 1.37199 0.958826959 4.6021561858199664 1 254 1 2.79375386 0.9988 0.0017323029622904203 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.8361913 0.00723227439 0.010471879968765333 0 271 0 -1.42138422 0.0202620383 0.02953215377914243 0 272 1 1.10222125 0.921965 0.11721610663727763 1 -275 0 +275 0 ? ? ? 0 276 0 -1.73186815 0.009382072 0.013599363941350218 0 277 0 -1.93927169 0.00558965746 0.0080867934194779039 0 278 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.8361913 0.00723227439 0.010471879968765333 0 293 1 1.65592456 0.9794098 0.030015442677317709 1 296 0 0.553571463 0.748245 1.9899076684215853 1 -297 0 +297 0 ? ? ? 0 299 1 2.001486 0.991262555 0.012660861084033199 1 300 1 2.09417748 0.9930671 0.010036913891508983 1 301 0 -1.8361913 0.00723227439 0.010471879968765333 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 1.47151637 0.9676506 0.047441894201880302 1 317 1 3.06524873 0.999393463 0.00087531320929783803 1 319 0 0.8519552 0.8629285 2.8669995609516308 1 -321 0 +321 0 ? ? ? 0 323 1 1.75474 0.9838682 0.023463058122578043 1 327 0 -1.93927169 0.00558965746 0.0080867934194779039 0 328 1 1.43182349 0.964374959 0.052333903838185522 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 5.09431458 0.9999963 5.3314720279485219E-06 1 613 0 -1.63692176 0.0118829105 0.017246087060301379 0 614 0 -1.9123522 0.00597891957 0.0086516472088849234 0 -617 0 +617 0 ? ? ? 0 618 0 -1.62816632 0.0121443039 0.017627783540922228 0 619 0 -1.52446461 0.0157068819 0.022840087163427197 0 621 0 -0.188778639 0.31474337 0.54528371301135603 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.57970572 0.0144622317 0.021016934775320784 0 19 0 -1.32506859 0.0253080986 0.036981837697027814 0 22 0 -1.76987159 0.009491567 0.01375883626140059 0 -23 1 +23 1 ? ? ? 0 24 0 -1.96166122 0.00619609 0.0089668769102733102 0 26 0 -1.67495286 0.0117150256 0.017000988488892345 0 27 0 -1.51523459 0.0166727714 0.024256502854015347 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.75784481 0.009748338 0.014132877086598956 0 135 0 -1.07198834 0.04377895 0.06458392745656516 0 136 0 -1.64255309 0.0125861792 0.018273256294688772 0 -139 0 +139 0 ? ? ? 0 140 0 -1.832719 0.008254912 0.01195874763978641 0 142 1 1.20702124 0.8832422 0.17911900782229345 1 143 0 -1.436229 0.0198381469 0.028908095216224419 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.37983692 0.0224505328 0.032758385480089913 0 155 1 1.24272728 0.8912444 0.16610696969487637 1 157 0 -1.83434272 0.008225175 0.011915489581813862 0 -158 0 +158 0 ? ? ? 0 159 1 4.09360933 0.999795 0.00029575448112624953 1 160 1 3.04705143 0.997864842 0.0030836744978663621 1 162 0 -1.70702422 0.0109114433 0.015828398337213719 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 2.80366325 0.996321738 0.0053163942348769758 1 232 0 0.248882532 0.46910888 0.91351208341864276 1 234 0 -0.8223932 0.0741594 0.11116426368346338 0 -235 0 +235 0 ? ? ? 0 236 1 3.65436459 0.999451637 0.00079133718371403176 1 238 1 4.208801 0.99984163 0.00022849704563499302 1 243 0 -1.18483961 0.0343320966 0.050400968413634589 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 4.869809 0.999964 5.1939695512002597E-05 1 287 0 -1.75784481 0.009748338 0.014132877086598956 0 289 1 2.8387928 0.996599257 0.0049145964567919058 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 2.21859837 0.9864867 0.019628533461458434 1 298 0 -1.053101 0.0455854833 0.067312108231381534 0 302 1 4.847371 0.999962151 5.4605525036338512E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.96166122 0.00619609 0.0089668769102733102 0 310 0 -1.88516331 0.007346285 0.010637570327263628 0 313 0 -2.087356 0.00468213623 0.0067707579292392971 0 -315 0 +315 0 ? ? ? 0 318 0 -1.9255811 0.006714394 0.00971948958962956 0 320 1 1.98424911 0.9773634 0.03303300364958086 1 322 0 -1.70702422 0.0109114433 0.015828398337213719 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.02450848 0.00538646942 0.0077920373965741406 0 408 0 -1.276682 0.0281251986 0.041157619505191562 0 410 0 -2.02450848 0.00538646942 0.0077920373965741406 0 -411 0 +411 0 ? ? ? 0 412 1 2.820312 0.996455967 0.0051220399276606741 1 417 0 -2.02450848 0.00538646942 0.0077920373965741406 0 420 0 -1.02720916 0.0481775925 0.071235676455785449 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt index 0c2f376b8f..43672feda2 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.PAVcalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.309107 0.9756098 0.035623875334191583 1 21 1 6.161626 1 0 1 22 0 -3.35177135 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -3.57612 1E-15 1.4415419267167138E-15 0 25 1 1.68778992 0.8125 0.29956028185890782 1 26 0 -3.21128821 1E-15 1.4415419267167138E-15 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.20465565 0.0625 0.093109404391481479 0 38 1 4.3035 0.9756098 0.035623875334191583 1 39 1 2.444232 0.84 0.25153881203904033 1 -40 0 +40 0 ? ? ? 0 41 1 2.349327 0.84 0.25153881203904033 1 42 1 6.69547653 1 0 1 43 1 0.1725626 0.7777778 0.36257005481575838 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -3.11112714 1E-15 1.4415419267167138E-15 0 137 0 -3.60871148 1E-15 1.4415419267167138E-15 0 138 0 -2.89319158 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -3.60871148 1E-15 1.4415419267167138E-15 0 141 0 -3.8493557 1E-15 1.4415419267167138E-15 0 142 1 2.848053 0.84 0.25153881203904033 1 143 0 -2.73937 1E-15 1.4415419267167138E-15 0 144 0 -3.59241557 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 146 1 1.31008816 0.8125 0.29956028185890782 1 147 0 -3.43885779 1E-15 1.4415419267167138E-15 0 148 0 -0.152019978 0.4 0.73696560849809378 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.618639 0.84 0.25153881203904033 1 156 0 -3.41809654 1E-15 1.4415419267167138E-15 0 157 0 -3.33547568 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 9.536341 1 0 1 160 1 6.860572 1 0 1 161 0 -2.5829134 1E-15 1.4415419267167138E-15 0 162 0 -3.09483147 1E-15 1.4415419267167138E-15 0 163 0 -2.50633955 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 165 0 -2.245256 1E-15 1.4415419267167138E-15 0 166 1 6.456311 1 0 1 167 1 4.319108 0.9756098 0.035623875334191583 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.8359299 0.7777778 2.1699250874336404 1 233 1 3.89948845 0.9411765 0.087462835875881578 1 234 0 -1.06992626 0.0625 0.093109404391481479 0 -235 0 +235 0 ? ? ? 0 236 1 8.428113 1 0 1 237 1 5.33226967 1 0 1 238 1 9.183852 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.645902 1 0 1 247 1 2.597087 0.84 0.25153881203904033 1 248 0 -1.497818 0.0625 0.093109404391481479 0 -249 0 +249 0 ? ? ? 0 250 0 -3.67503667 1E-15 1.4415419267167138E-15 0 251 1 5.0781126 1 0 1 252 0 3.308917 0.84 2.643855953298599 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 2.38956165 0.84 0.25153881203904033 1 273 1 0.3306446 0.7777778 0.36257005481575838 1 274 0 -2.82355762 1E-15 1.4415419267167138E-15 0 -275 0 +275 0 ? ? ? 0 276 0 -3.35177135 1E-15 1.4415419267167138E-15 0 277 0 -3.83305979 1E-15 1.4415419267167138E-15 0 278 0 -3.57612 1E-15 1.4415419267167138E-15 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.52714539 1 0 1 290 0 -4.08999968 1E-15 1.4415419267167138E-15 0 291 0 -3.57612 1E-15 1.4415419267167138E-15 0 -292 1 +292 1 ? ? ? 0 293 1 3.63595963 0.9411765 0.087462835875881578 1 -294 0 +294 0 ? ? ? 0 295 1 5.25647163 1 0 1 296 0 1.97336864 0.8125 2.4150374992788439 1 -297 0 +297 0 ? ? ? 0 298 0 -2.03882861 1E-15 1.4415419267167138E-15 0 299 1 4.99616432 1 0 1 300 1 5.63767242 1 0 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.14314938 0.84 0.25153881203904033 1 313 0 -4.08999968 1E-15 1.4415419267167138E-15 0 314 0 -3.88960457 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 316 1 3.149722 0.84 0.25153881203904033 1 317 1 7.02619457 1 0 1 318 0 -3.406486 1E-15 1.4415419267167138E-15 0 319 0 1.82393885 0.8125 2.4150374992788439 1 320 1 5.15885925 1 0 1 -321 0 +321 0 ? ? ? 0 322 0 -3.09483147 1E-15 1.4415419267167138E-15 0 323 1 3.22143555 0.84 0.25153881203904033 1 324 0 -3.57612 1E-15 1.4415419267167138E-15 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.29045367 1E-15 1.4415419267167138E-15 0 409 0 -3.13383579 1E-15 1.4415419267167138E-15 0 410 0 -3.83305979 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 6.52325153 1 0 1 413 0 -2.39560747 1E-15 1.4415419267167138E-15 0 414 1 5.304202 1 0 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -3.64896035 1E-15 1.4415419267167138E-15 0 615 0 -2.675256 1E-15 1.4415419267167138E-15 0 616 0 -3.35177135 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -3.11112714 1E-15 1.4415419267167138E-15 0 619 0 -2.870483 1E-15 1.4415419267167138E-15 0 620 0 -3.35177135 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt index 69424d1f96..b1c0d65d4f 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.calibrateRandom.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.309107 0.982883155 0.024908174987257281 1 21 1 6.161626 0.997486651 0.0036305605660221312 1 22 0 -3.35177135 0.0190011337 0.027676625763894382 0 -23 1 +23 1 ? ? ? 0 24 0 -3.57612 0.0150948567 0.021943310344814629 0 25 1 1.68778992 0.7883411 0.34310809466132391 1 26 0 -3.21128821 0.02193545 0.031998411900987428 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.20465565 0.154014841 0.24129573984468006 0 38 1 4.3035 0.98278445 0.025053063812451211 1 39 1 2.444232 0.8913242 0.16597778265246793 1 -40 0 +40 0 ? ? ? 0 41 1 2.349327 0.881352544 0.18220887767190111 1 42 1 6.69547653 0.99855864 0.002080942285094773 1 43 1 0.1725626 0.43382585 1.2048120730945795 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -3.11112714 0.0242936034 0.035481008429150813 0 137 0 -3.60871148 0.0145974671 0.021214914966477344 0 138 0 -2.89319158 0.0303093083 0.044403459692558656 0 -139 0 +139 0 ? ? ? 0 140 0 -3.60871148 0.0145974671 0.021214914966477344 0 141 0 -3.8493557 0.01139268 0.016530506138550251 0 142 1 2.848053 0.9259271 0.11102947905485054 1 143 0 -2.73937 0.0353999846 0.051997261707361145 0 144 0 -3.59241557 0.0148441121 0.021576064827714625 0 -145 0 +145 0 ? ? ? 0 146 1 1.31008816 0.7152061 0.48356908051152397 1 147 0 -3.43885779 0.01737916 0.025293256640113757 0 148 0 -0.152019978 0.353206038 0.62862188502856431 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.618639 0.9077395 0.139649725798073 1 156 0 -3.41809654 0.0177530367 0.025842292400220613 0 157 0 -3.33547568 0.01932072 0.028146699492868216 0 -158 0 +158 0 ? ? ? 0 159 1 9.536341 0.999925554 0.0001074071635410925 1 160 1 6.860572 0.998786449 0.0017518465781146784 1 161 0 -2.5829134 0.0414183028 0.061026700952795158 0 162 0 -3.09483147 0.0246999636 0.036081984337152172 0 163 0 -2.50633955 0.04470976 0.065988970965053517 0 -164 0 +164 0 ? ? ? 0 165 0 -2.245256 0.0579013154 0.086049905177797967 0 166 1 6.456311 0.998150766 0.0026703506849678977 1 167 1 4.319108 0.983057857 0.024651767929287045 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.8359299 0.6049175 1.3397742465159499 1 233 1 3.89948845 0.9739913 0.038019246409892023 1 234 0 -1.06992626 0.17323719 0.27445460033191865 0 -235 0 +235 0 ? ? ? 0 236 1 8.428113 0.99976337 0.0003414259594434771 1 237 1 5.33226967 0.994048536 0.0086117997569089607 1 238 1 9.183852 0.9998925 0.00015513669190214178 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.645902 0.9998115 0.00027201620468366962 1 247 1 2.597087 0.9058386 0.14267406349976108 1 248 0 -1.497818 0.118221387 0.1815116096306757 0 -249 0 +249 0 ? ? ? 0 250 0 -3.67503667 0.0136346063 0.019805910679898876 0 251 1 5.0781126 0.9922549 0.011217294571842519 1 252 0 3.308917 0.952875 4.4073641261174119 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 2.38956165 0.8856733 0.17515349285073656 1 273 1 0.3306446 0.474698573 1.0749163809587898 1 274 0 -2.82355762 0.0325194858 0.047695490325124906 0 -275 0 +275 0 ? ? ? 0 276 0 -3.35177135 0.0190011337 0.027676625763894382 0 277 0 -3.83305979 0.0115858121 0.016812375308683684 0 278 0 -3.57612 0.0150948567 0.021943310344814629 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.52714539 0.9982823 0.0024802287545360943 1 290 0 -4.08999968 0.008885143 0.012875839136462838 0 291 0 -3.57612 0.0150948567 0.021943310344814629 0 -292 1 +292 1 ? ? ? 0 293 1 3.63595963 0.9660381 0.049847993866089353 1 -294 0 +294 0 ? ? ? 0 295 1 5.25647163 0.9935618 0.0093183820918222807 1 296 0 1.97336864 0.8338234 2.589210701355853 1 -297 0 +297 0 ? ? ? 0 298 0 -2.03882861 0.0708337 0.10599126176574966 0 299 1 4.99616432 0.9915693 0.012214518276517204 1 300 1 5.63767242 0.995665669 0.0062667080268442431 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.14314938 0.856945038 0.22272541817603111 1 313 0 -4.08999968 0.008885143 0.012875839136462838 0 314 0 -3.88960457 0.0109292 0.015854298528928425 0 -315 0 +315 0 ? ? ? 0 316 1 3.149722 0.9448283 0.081875960094708669 1 317 1 7.02619457 0.9989789 0.0014738699989607148 1 318 0 -3.406486 0.0179655552 0.026154467003641695 0 319 0 1.82393885 0.8110785 2.4041411141982105 1 320 1 5.15885925 0.992876351 0.01031403381058839 1 -321 0 +321 0 ? ? ? 0 322 0 -3.09483147 0.0246999636 0.036081984337152172 0 323 1 3.22143555 0.948601961 0.076125243844936963 1 324 0 -3.57612 0.0150948567 0.021943310344814629 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.29045367 0.05538148 0.082196274856991033 0 409 0 -3.13383579 0.0237381756 0.034659977772739645 0 410 0 -3.83305979 0.0115858121 0.016812375308683684 0 -411 0 +411 0 ? ? ? 0 412 1 6.52325153 0.99827534 0.0024903070861691909 1 413 0 -2.39560747 0.0499131419 0.073868682437534181 0 414 1 5.304202 0.9938727 0.0088670155127870496 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -3.64896035 0.0140054561 0.020348431589167357 0 615 0 -2.675256 0.0377570055 0.055526832252899339 0 616 0 -3.35177135 0.0190011337 0.027676625763894382 0 -617 0 +617 0 ? ? ? 0 618 0 -3.11112714 0.0242936034 0.035481008429150813 0 619 0 -2.870483 0.0310136 0.045451678527033197 0 620 0 -3.35177135 0.0190011337 0.027676625763894382 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt index 0f08e7f566..55861f250c 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.nocalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Assigned 20 1 4.309107 1 21 1 6.161626 1 22 0 -3.35177135 0 -23 1 +23 1 ? 0 24 0 -3.57612 0 25 1 1.68778992 1 26 0 -3.21128821 0 @@ -39,7 +39,7 @@ Instance Label Score Assigned 37 0 -1.20465565 0 38 1 4.3035 1 39 1 2.444232 1 -40 0 +40 0 ? 0 41 1 2.349327 1 42 1 6.69547653 1 43 1 0.1725626 1 @@ -138,13 +138,13 @@ Instance Label Score Assigned 136 0 -3.11112714 0 137 0 -3.60871148 0 138 0 -2.89319158 0 -139 0 +139 0 ? 0 140 0 -3.60871148 0 141 0 -3.8493557 0 142 1 2.848053 1 143 0 -2.73937 0 144 0 -3.59241557 0 -145 0 +145 0 ? 0 146 1 1.31008816 1 147 0 -3.43885779 0 148 0 -0.152019978 0 @@ -157,13 +157,13 @@ Instance Label Score Assigned 155 1 2.618639 1 156 0 -3.41809654 0 157 0 -3.33547568 0 -158 0 +158 0 ? 0 159 1 9.536341 1 160 1 6.860572 1 161 0 -2.5829134 0 162 0 -3.09483147 0 163 0 -2.50633955 0 -164 0 +164 0 ? 0 165 0 -2.245256 0 166 1 6.456311 1 167 1 4.319108 1 @@ -234,7 +234,7 @@ Instance Label Score Assigned 232 0 0.8359299 1 233 1 3.89948845 1 234 0 -1.06992626 0 -235 0 +235 0 ? 0 236 1 8.428113 1 237 1 5.33226967 1 238 1 9.183852 1 @@ -248,7 +248,7 @@ Instance Label Score Assigned 246 1 8.645902 1 247 1 2.597087 1 248 0 -1.497818 0 -249 0 +249 0 ? 0 250 0 -3.67503667 0 251 1 5.0781126 1 252 0 3.308917 1 @@ -274,7 +274,7 @@ Instance Label Score Assigned 272 1 2.38956165 1 273 1 0.3306446 1 274 0 -2.82355762 0 -275 0 +275 0 ? 0 276 0 -3.35177135 0 277 0 -3.83305979 0 278 0 -3.57612 0 @@ -291,12 +291,12 @@ Instance Label Score Assigned 289 1 6.52714539 1 290 0 -4.08999968 0 291 0 -3.57612 0 -292 1 +292 1 ? 0 293 1 3.63595963 1 -294 0 +294 0 ? 0 295 1 5.25647163 1 296 0 1.97336864 1 -297 0 +297 0 ? 0 298 0 -2.03882861 0 299 1 4.99616432 1 300 1 5.63767242 1 @@ -314,13 +314,13 @@ Instance Label Score Assigned 312 1 2.14314938 1 313 0 -4.08999968 0 314 0 -3.88960457 0 -315 0 +315 0 ? 0 316 1 3.149722 1 317 1 7.02619457 1 318 0 -3.406486 0 319 0 1.82393885 1 320 1 5.15885925 1 -321 0 +321 0 ? 0 322 0 -3.09483147 0 323 1 3.22143555 1 324 0 -3.57612 0 @@ -410,7 +410,7 @@ Instance Label Score Assigned 408 0 -2.29045367 0 409 0 -3.13383579 0 410 0 -3.83305979 0 -411 0 +411 0 ? 0 412 1 6.52325153 1 413 0 -2.39560747 0 414 1 5.304202 1 @@ -616,7 +616,7 @@ Instance Label Score Assigned 614 0 -3.64896035 0 615 0 -2.675256 0 616 0 -3.35177135 0 -617 0 +617 0 ? 0 618 0 -3.11112714 0 619 0 -2.870483 0 620 0 -3.35177135 0 diff --git a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt index f1da00a926..e07ceec647 100644 --- a/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/AveragedPerceptron/AveragedPerceptron-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 2.27614474 0.99046284 0.01382524609823586 1 21 1 2.71551442 0.9964734 0.0050968413754289565 1 22 0 -1.91610467 0.007339344 0.010627482294138626 0 -23 1 +23 1 ? ? ? 0 24 0 -2.12083673 0.00461633364 0.0066753814217522627 0 25 1 0.579404354 0.6851912 0.54542144241146051 1 26 0 -1.88108075 0.007944072 0.011506638331166432 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.9763708 0.0591678135 0.087990678318914578 0 38 1 1.97923112 0.9814142 0.027065950749429996 1 39 1 0.8320954 0.794680536 0.33155308613045453 1 -40 0 +40 0 ? ? ? 0 41 1 0.88030076 0.812019646 0.30041346219868831 1 42 1 3.29876113 0.9990636 0.0013515566126819322 1 43 1 0.21289444 0.485705882 1.0418451361573347 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.76776826 0.010259659 0.014878012048023576 0 137 0 -2.00804472 0.00596073642 0.0086252569347725905 0 138 0 -1.64871967 0.0134130223 0.019481850015569589 0 -139 0 +139 0 ? ? ? 0 140 0 -2.00804472 0.00596073642 0.0086252569347725905 0 141 0 -2.15638113 0.00425880356 0.0061572754121802442 0 142 1 1.42587113 0.93738085 0.093292772480968233 1 143 0 -1.57122457 0.0159614533 0.023213265078036949 0 144 0 -2.06444073 0.005245867 0.007588107121760313 0 -145 0 +145 0 ? ? ? 0 146 1 0.5954063 0.693000734 0.52907121478369279 1 147 0 -1.97373641 0.00644216966 0.0093241524569279595 0 148 0 -0.5180371 0.15157719 0.23714468564136862 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 1.38289 0.931381464 0.10255592440494538 1 156 0 -2.00303721 0.00602870947 0.0087239127220832425 0 157 0 -1.97250068 0.00646021264 0.0093503519898618208 0 -158 0 +158 0 ? ? ? 0 159 1 4.52930737 0.9999432 8.1952060728897203E-05 1 160 1 3.334266 0.9991363 0.0012465526889135054 1 161 0 -1.5841018 0.015507183 0.022547414886151074 0 162 0 -1.82416427 0.009033906 0.01309239897866022 0 163 0 -1.37575865 0.0246935654 0.03607251996647886 0 -164 0 +164 0 ? ? ? 0 165 0 -1.40072858 0.0233600326 0.034101275741716536 0 166 1 3.14331079 0.9986662 0.0019255122730899305 1 167 1 2.61804938 0.995600462 0.0063611951559849299 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.262052536 0.5136946 1.0400654369740645 1 233 1 2.16407657 0.987723053 0.017821513393369265 1 234 0 -1.00943446 0.0551115535 0.081784080061402753 0 -235 0 +235 0 ? ? ? 0 236 1 4.29103756 0.999902248 0.00014103266690546063 1 237 1 2.42226958 0.9931447 0.0099241759578403005 1 238 1 4.649124 0.9999568 6.2345058017421014E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 4.143937 0.9998633 0.00019719158494834585 1 247 1 1.19717526 0.898898065 0.15377057127992425 1 248 0 -1.217197 0.03506021 0.051489167959956395 0 -249 0 +249 0 ? ? ? 0 250 0 -2.09497738 0.004895074 0.0070794400974987601 0 251 1 2.700413 0.9963504 0.0052748803033137396 1 252 0 1.58717227 0.955785036 4.4993214783146023 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.980280161 0.8443483 0.2440898311411345 1 273 1 0.00625777245 0.3710024 1.4304995524008379 1 274 0 -1.73243809 0.0111099789 0.01611801356385726 0 -275 0 +275 0 ? ? ? 0 276 0 -1.91610467 0.007339344 0.010627482294138626 0 277 0 -2.21277714 0.00374727719 0.0054163328257276827 0 278 0 -2.12083673 0.00461633364 0.0066753814217522627 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 3.0956676 0.9985136 0.002146047033326764 1 290 0 -2.30471754 0.00304132653 0.0043943924919565489 0 291 0 -2.12083673 0.00461633364 0.0066753814217522627 0 -292 1 +292 1 ? ? ? 0 293 1 1.9506762 0.980189741 0.028867048302480647 1 -294 0 +294 0 ? ? ? 0 295 1 2.57708764 0.995172262 0.0069818201865888847 1 296 0 0.6443205 0.7161847 1.8169756066019795 1 -297 0 +297 0 ? ? ? 0 298 0 -1.00866961 0.0552023575 0.081922729978655878 0 299 1 2.32091022 0.9913795 0.012490669010981763 1 300 1 2.40809751 0.992921352 0.010248646031556275 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 1.19807649 0.899084449 0.15347146394878203 1 313 0 -2.30471754 0.00304132653 0.0043943924919565489 0 314 0 -2.26844478 0.00330244377 0.0047723037303054865 0 -315 0 +315 0 ? ? ? 0 316 1 1.47730184 0.943915665 0.083270128208594502 1 317 1 3.18132329 0.998776734 0.0017658802629188005 1 318 0 -1.9538343 0.00673895236 0.0097551599741469968 0 319 0 0.68000555 0.732414246 1.9019267823886503 1 320 1 2.448116 0.993534148 0.009358541174187586 1 -321 0 +321 0 ? ? ? 0 322 0 -1.82416427 0.009033906 0.01309239897866022 0 323 1 1.739836 0.9683619 0.046381755881038968 1 324 0 -2.12083673 0.00461633364 0.0066753814217522627 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.41561711 0.022598628 0.032976965091470092 0 409 0 -1.79705584 0.009603847 0.013922383925987339 0 410 0 -2.21277714 0.00374727719 0.0054163328257276827 0 -411 0 +411 0 ? ? ? 0 412 1 3.177976 0.9987674 0.0017793974994358322 1 413 0 -1.40844309 0.0229624342 0.033514061939899156 0 414 1 2.38908362 0.992610335 0.010700618410080708 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -2.1201086 0.00462396163 0.0066864373761273176 0 615 0 -1.529671 0.01751844 0.025497765300806271 0 616 0 -1.91610467 0.007339344 0.010627482294138626 0 -617 0 +617 0 ? ? ? 0 618 0 -1.76776826 0.010259659 0.014878012048023576 0 619 0 -1.61943209 0.0143251875 0.020816334525491278 0 620 0 -1.91610467 0.007339344 0.010627482294138626 0 diff --git a/test/BaselineOutput/SingleRelease/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt b/test/BaselineOutput/SingleRelease/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt index 3608e165c9..338a27b79c 100644 --- a/test/BaselineOutput/SingleRelease/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt +++ b/test/BaselineOutput/SingleRelease/Command/CommandCrossValidationKeyLabelWithFloatKeyValues-out.txt @@ -38,6 +38,8 @@ DCG@2: 0.000000 (0.0000) DCG@3: 0.000000 (0.0000) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleRelease/Command/Datatypes-1-out.txt b/test/BaselineOutput/SingleRelease/Command/Datatypes-1-out.txt new file mode 100644 index 0000000000..fe04f014c2 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/Command/Datatypes-1-out.txt @@ -0,0 +1 @@ +Wrote 5 rows across 9 columns in %Time% diff --git a/test/BaselineOutput/SingleRelease/Command/Datatypes-2-out.txt b/test/BaselineOutput/SingleRelease/Command/Datatypes-2-out.txt new file mode 100644 index 0000000000..a2aaab4439 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/Command/Datatypes-2-out.txt @@ -0,0 +1 @@ +Wrote 5 rows of length 9 diff --git a/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt b/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt index e7d128e400..aaf1a3cb2e 100644 --- a/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt +++ b/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt @@ -14,6 +14,6 @@ bl i1 i2 i4 i8 ts dto dt tx 0 127 32767 2147483647 9223372036854775807 "2.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" foo 1 -127 -32767 -2147483647 -9223372036854775807 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" xyz - "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" +0 -128 -32768 -2147483648 -9223372036854775808 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" "" 9 0:0 - +0 -128 -32768 -2147483648 -9223372036854775808 "00:00:00" "0001-01-01T00:00:00.0000000+00:00" "0001-01-01T00:00:00.0000000" "" diff --git a/test/BaselineOutput/SingleRelease/Command/Datatypes-out.txt b/test/BaselineOutput/SingleRelease/Command/Datatypes-out.txt new file mode 100644 index 0000000000..a2aaab4439 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/Command/Datatypes-out.txt @@ -0,0 +1 @@ +Wrote 5 rows of length 9 diff --git a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key-out.txt index 193a84b3cd..f6c942f824 100644 --- a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key-out.txt +++ b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key-out.txt @@ -46,6 +46,12 @@ Log-loss: 0.253074 (0.0597) Log-loss reduction: 76.713844 (5.4729) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt index e6f1c68e64..28327f69a4 100644 --- a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt +++ b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt @@ -50,6 +50,10 @@ Log-loss: 0.253074 (0.0597) Log-loss reduction: 76.713839 (5.4729) --------------------------------------- +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' +Warning: There is no NA value for type 'Text'. The missing key value will be mapped to the default value of 'Text' Physical memory usage(MB): %Number% Virtual memory usage(MB): %Number% %DateTime% Time elapsed(s): %Number% diff --git a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt index b337fd78da..b113f448bc 100644 --- a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.PAVcalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 1.25826859 1 0 1 35 0 -1.41764426 1E-15 1.4415419267167138E-15 0 37 0 -0.835641861 1E-15 1.4415419267167138E-15 0 -40 0 +40 0 ? ? ? 0 41 1 0.227130175 0.8666667 0.20645086423799175 1 44 1 1.61423349 1 0 1 45 0 -1.47288513 1E-15 1.4415419267167138E-15 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.19960022 1E-15 1.4415419267167138E-15 0 141 0 -1.47188914 1E-15 1.4415419267167138E-15 0 144 0 -1.41764426 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 147 0 -1.334388 1E-15 1.4415419267167138E-15 0 150 0 -1.48176765 1E-15 1.4415419267167138E-15 0 151 1 0.7797704 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.54539037 1E-15 1.4415419267167138E-15 0 156 0 -1.35846376 1E-15 1.4415419267167138E-15 0 161 0 -1.1599288 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 167 1 1.07981849 1 0 1 169 0 -1.5413084 1E-15 1.4415419267167138E-15 0 171 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.368261 1 0 1 247 1 0.4837153 0.8666667 0.20645086423799175 1 248 0 -0.934056163 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -1.41270852 1E-15 1.4415419267167138E-15 0 252 0 0.706041336 0.8666667 2.9068906815998465 1 254 1 1.600352 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.43690062 1E-15 1.4415419267167138E-15 0 271 0 -1.1428957 1E-15 1.4415419267167138E-15 0 272 1 0.435359716 0.8666667 0.20645086423799175 1 -275 0 +275 0 ? ? ? 0 276 0 -1.34414315 1E-15 1.4415419267167138E-15 0 277 0 -1.49114561 1E-15 1.4415419267167138E-15 0 278 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.43690062 1E-15 1.4415419267167138E-15 0 293 1 0.9331081 1 0 1 296 0 0.221757889 0.8666667 2.9068906815998465 1 -297 0 +297 0 ? ? ? 0 299 1 1.13769388 1 0 1 300 1 1.24829745 1 0 1 301 0 -1.43690062 1E-15 1.4415419267167138E-15 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.7455783 0.9151356 0.12794252993534275 1 317 1 1.81665158 1 0 1 319 0 0.285441637 0.8666667 2.9068906815998465 1 -321 0 +321 0 ? ? ? 0 323 1 0.8990345 1 0 1 327 0 -1.49114561 1E-15 1.4415419267167138E-15 0 328 1 0.8467562 1 0 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.95104 1 0 1 613 0 -1.29436374 1E-15 1.4415419267167138E-15 0 614 0 -1.4625113 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -1.2706418 1E-15 1.4415419267167138E-15 0 619 0 -1.19714069 1E-15 1.4415419267167138E-15 0 621 0 -0.3866408 0.5799383 1.2513268180432666 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.13828516 1E-15 1.4415419267167138E-15 0 19 0 -0.9969288 1E-15 1.4415419267167138E-15 0 22 0 -1.259604 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -1.35031986 1E-15 1.4415419267167138E-15 0 26 0 -1.22985053 1E-15 1.4415419267167138E-15 0 27 0 -1.11824775 1E-15 1.4415419267167138E-15 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.22317672 1E-15 1.4415419267167138E-15 0 135 0 -0.8295188 1E-15 1.4415419267167138E-15 0 136 0 -1.18892586 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -1.3102448 1E-15 1.4415419267167138E-15 0 142 1 0.6019325 0.875 0.19264507794239591 1 143 0 -1.00445139 1E-15 1.4415419267167138E-15 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.01599169 1E-15 1.4415419267167138E-15 0 155 1 0.7134235 0.875 0.19264507794239591 1 157 0 -1.27964163 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 2.42447567 1 0 1 160 1 1.81060028 1 0 1 162 0 -1.20896339 1E-15 1.4415419267167138E-15 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.72919893 1 0 1 232 0 -0.038654685 0.6923077 1.7004398041324202 0 234 0 -0.661043048 1E-15 1.4415419267167138E-15 0 -235 0 +235 0 ? ? ? 0 236 1 2.13174057 1 0 1 238 1 2.40485334 1 0 1 243 0 -0.8018886 1E-15 1.4415419267167138E-15 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 2.9424572 1 0 1 287 0 -1.22317672 1E-15 1.4415419267167138E-15 0 289 1 1.71858239 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.42958808 1 0 1 298 0 -0.7813908 1E-15 1.4415419267167138E-15 0 302 1 2.925787 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.35031986 1E-15 1.4415419267167138E-15 0 310 0 -1.293855 1E-15 1.4415419267167138E-15 0 313 0 -1.45160127 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 318 0 -1.24103785 1E-15 1.4415419267167138E-15 0 320 1 1.18153763 1 0 1 322 0 -1.20896339 1E-15 1.4415419267167138E-15 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.40096045 1E-15 1.4415419267167138E-15 0 408 0 -0.928058 1E-15 1.4415419267167138E-15 0 410 0 -1.40096045 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 1.731832 1 0 1 417 0 -1.40096045 1E-15 1.4415419267167138E-15 0 420 0 -0.7677182 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt index fed143de3d..000020d071 100644 --- a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-CV-breast-cancer.nocalibration.txt @@ -14,7 +14,7 @@ Instance Label Score Assigned 32 1 1.25826859 1 35 0 -1.41764426 0 37 0 -0.835641861 0 -40 0 +40 0 ? 0 41 1 0.227130175 1 44 1 1.61423349 1 45 0 -1.47288513 0 @@ -76,7 +76,7 @@ Instance Label Score Assigned 138 0 -1.19960022 0 141 0 -1.47188914 0 144 0 -1.41764426 0 -145 0 +145 0 ? 0 147 0 -1.334388 0 150 0 -1.48176765 0 151 1 0.7797704 1 @@ -84,7 +84,7 @@ Instance Label Score Assigned 154 0 -1.54539037 0 156 0 -1.35846376 0 161 0 -1.1599288 0 -164 0 +164 0 ? 0 167 1 1.07981849 1 169 0 -1.5413084 0 171 0 -1.43690062 0 @@ -130,7 +130,7 @@ Instance Label Score Assigned 246 1 2.368261 1 247 1 0.4837153 1 248 0 -0.934056163 0 -249 0 +249 0 ? 0 250 0 -1.41270852 0 252 0 0.706041336 1 254 1 1.600352 1 @@ -144,7 +144,7 @@ Instance Label Score Assigned 269 0 -1.43690062 0 271 0 -1.1428957 0 272 1 0.435359716 1 -275 0 +275 0 ? 0 276 0 -1.34414315 0 277 0 -1.49114561 0 278 0 -1.43690062 0 @@ -158,7 +158,7 @@ Instance Label Score Assigned 291 0 -1.43690062 0 293 1 0.9331081 1 296 0 0.221757889 1 -297 0 +297 0 ? 0 299 1 1.13769388 1 300 1 1.24829745 1 301 0 -1.43690062 0 @@ -172,7 +172,7 @@ Instance Label Score Assigned 316 1 0.7455783 1 317 1 1.81665158 1 319 0 0.285441637 1 -321 0 +321 0 ? 0 323 1 0.8990345 1 327 0 -1.49114561 0 328 1 0.8467562 1 @@ -318,7 +318,7 @@ Instance Label Score Assigned 612 1 2.95104 1 613 0 -1.29436374 0 614 0 -1.4625113 0 -617 0 +617 0 ? 0 618 0 -1.2706418 0 619 0 -1.19714069 0 621 0 -0.3866408 0 @@ -375,7 +375,7 @@ Instance Label Score Assigned 17 0 -1.13828516 0 19 0 -0.9969288 0 22 0 -1.259604 0 -23 1 +23 1 ? 0 24 0 -1.35031986 0 26 0 -1.22985053 0 27 0 -1.11824775 0 @@ -425,7 +425,7 @@ Instance Label Score Assigned 134 0 -1.22317672 0 135 0 -0.8295188 0 136 0 -1.18892586 0 -139 0 +139 0 ? 0 140 0 -1.3102448 0 142 1 0.6019325 1 143 0 -1.00445139 0 @@ -435,7 +435,7 @@ Instance Label Score Assigned 153 0 -1.01599169 0 155 1 0.7134235 1 157 0 -1.27964163 0 -158 0 +158 0 ? 0 159 1 2.42447567 1 160 1 1.81060028 1 162 0 -1.20896339 0 @@ -474,7 +474,7 @@ Instance Label Score Assigned 231 1 1.72919893 1 232 0 -0.038654685 0 234 0 -0.661043048 0 -235 0 +235 0 ? 0 236 1 2.13174057 1 238 1 2.40485334 1 243 0 -0.8018886 0 @@ -496,8 +496,8 @@ Instance Label Score Assigned 286 1 2.9424572 1 287 0 -1.22317672 0 289 1 1.71858239 1 -292 1 -294 0 +292 1 ? 0 +294 0 ? 0 295 1 1.42958808 1 298 0 -0.7813908 0 302 1 2.925787 1 @@ -506,7 +506,7 @@ Instance Label Score Assigned 307 0 -1.35031986 0 310 0 -1.293855 0 313 0 -1.45160127 0 -315 0 +315 0 ? 0 318 0 -1.24103785 0 320 1 1.18153763 1 322 0 -1.20896339 0 @@ -551,7 +551,7 @@ Instance Label Score Assigned 407 0 -1.40096045 0 408 0 -0.928058 0 410 0 -1.40096045 0 -411 0 +411 0 ? 0 412 1 1.731832 1 417 0 -1.40096045 0 420 0 -0.7677182 0 diff --git a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt index 9c673bc10a..905fbd46fd 100644 --- a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt +++ b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.PAVcalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.133441 1 0 1 21 1 1.46698761 1 0 1 22 0 -1.29266214 1E-15 1.4415419267167138E-15 0 -23 1 +23 1 ? ? ? 0 24 0 -1.38898408 1E-15 1.4415419267167138E-15 0 25 1 0.2072978 0.8125 0.29956028185890782 1 26 0 -1.27053475 1E-15 1.4415419267167138E-15 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.7899848 1E-15 1.4415419267167138E-15 0 38 1 1.07285547 0.955555558 0.065588337627980248 1 39 1 0.444084644 0.826086938 0.27563447429444238 1 -40 0 +40 0 ? ? ? 0 41 1 0.278235674 0.8125 0.29956028185890782 1 42 1 1.82501435 1 0 1 43 1 0.0648527145 0.8125 0.29956028185890782 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.21864986 1E-15 1.4415419267167138E-15 0 137 0 -1.34436476 1E-15 1.4415419267167138E-15 0 138 0 -1.15066063 1E-15 1.4415419267167138E-15 0 -139 0 +139 0 ? ? ? 0 140 0 -1.34436476 1E-15 1.4415419267167138E-15 0 141 0 -1.418377 1E-15 1.4415419267167138E-15 0 142 1 0.605928659 0.826086938 0.27563447429444238 1 143 0 -1.04888356 1E-15 1.4415419267167138E-15 0 144 0 -1.36667442 1E-15 1.4415419267167138E-15 0 -145 0 +145 0 ? ? ? 0 146 1 0.263422 0.8125 0.29956028185890782 1 147 0 -1.28818154 1E-15 1.4415419267167138E-15 0 148 0 -0.38141644 0.333333343 0.5849625222189877 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.7075844 0.826086938 0.27563447429444238 1 156 0 -1.30894148 1E-15 1.4415419267167138E-15 0 157 0 -1.3149718 1E-15 1.4415419267167138E-15 0 -158 0 +158 0 ? ? ? 0 159 1 2.41813564 1 0 1 160 1 1.80749393 1 0 1 161 0 -1.10798192 1E-15 1.4415419267167138E-15 0 162 0 -1.24095953 1E-15 1.4415419267167138E-15 0 163 0 -1.14838839 1E-15 1.4415419267167138E-15 0 -164 0 +164 0 ? ? ? 0 165 0 -0.9971055 1E-15 1.4415419267167138E-15 0 166 1 1.77878284 1 0 1 167 1 1.0986836 0.955555558 0.065588337627980248 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.0348134041 0.727272749 1.8744692325712462 0 233 1 1.102045 1 0 1 234 0 -0.709003 0.1 0.15200309583369792 0 -235 0 +235 0 ? ? ? 0 236 1 2.13387632 1 0 1 237 1 1.230866 1 0 1 238 1 2.40226221 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.38478136 1 0 1 247 1 0.5292797 0.826086938 0.27563447429444238 1 248 0 -0.8721206 1E-15 1.4415419267167138E-15 0 -249 0 +249 0 ? ? ? 0 250 0 -1.3606441 1E-15 1.4415419267167138E-15 0 251 1 1.35184932 1 0 1 252 0 0.7461872 0.826086938 2.5235618055722013 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.451641083 0.826086938 0.27563447429444238 1 273 1 -0.184997916 0.727272749 0.45943157564163517 0 274 0 -1.1819942 1E-15 1.4415419267167138E-15 0 -275 0 +275 0 ? ? ? 0 276 0 -1.29266214 1E-15 1.4415419267167138E-15 0 277 0 -1.4406867 1E-15 1.4415419267167138E-15 0 278 0 -1.38898408 1E-15 1.4415419267167138E-15 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.698751 1 0 1 290 0 -1.49238932 1E-15 1.4415419267167138E-15 0 291 0 -1.38898408 1E-15 1.4415419267167138E-15 0 -292 1 +292 1 ? ? ? 0 293 1 0.9705 0.955555558 0.065588337627980248 1 -294 0 +294 0 ? ? ? 0 295 1 1.44576406 1 0 1 296 0 0.264410973 0.8125 2.4150374992788439 1 -297 0 +297 0 ? ? ? 0 298 0 -0.785661459 1E-15 1.4415419267167138E-15 0 299 1 1.16638494 1 0 1 300 1 1.27287531 1 0 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.6419792 0.826086938 0.27563447429444238 1 313 0 -1.49238932 1E-15 1.4415419267167138E-15 0 314 0 -1.4823153 1E-15 1.4415419267167138E-15 0 -315 0 +315 0 ? ? ? 0 316 1 0.773257732 0.826086938 0.27563447429444238 1 317 1 1.83582067 1 0 1 318 0 -1.26409817 1E-15 1.4415419267167138E-15 0 319 0 0.303462982 0.8125 2.4150374992788439 1 320 1 1.16069293 1 0 1 -321 0 +321 0 ? ? ? 0 322 0 -1.24095953 1E-15 1.4415419267167138E-15 0 323 1 0.909108639 0.955555558 0.065588337627980248 1 324 0 -1.38898408 1E-15 1.4415419267167138E-15 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.9617897 1E-15 1.4415419267167138E-15 0 409 0 -1.22467291 1E-15 1.4415419267167138E-15 0 410 0 -1.4406867 1E-15 1.4415419267167138E-15 0 -411 0 +411 0 ? ? ? 0 412 1 1.77363062 1 0 1 413 0 -1.02494574 1E-15 1.4415419267167138E-15 0 414 1 1.26032782 1 0 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.408303 1E-15 1.4415419267167138E-15 0 615 0 -1.0826714 1E-15 1.4415419267167138E-15 0 616 0 -1.29266214 1E-15 1.4415419267167138E-15 0 -617 0 +617 0 ? ? ? 0 618 0 -1.21864986 1E-15 1.4415419267167138E-15 0 619 0 -1.14463758 1E-15 1.4415419267167138E-15 0 620 0 -1.29266214 1E-15 1.4415419267167138E-15 0 diff --git a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt index c5e41e5240..1ee365e6f7 100644 --- a/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt +++ b/test/BaselineOutput/SingleRelease/LinearSVM/LinearSVM-TrainTest-breast-cancer.nocalibration.txt @@ -22,7 +22,7 @@ Instance Label Score Assigned 20 1 1.133441 1 21 1 1.46698761 1 22 0 -1.29266214 0 -23 1 +23 1 ? 0 24 0 -1.38898408 0 25 1 0.2072978 1 26 0 -1.27053475 0 @@ -39,7 +39,7 @@ Instance Label Score Assigned 37 0 -0.7899848 0 38 1 1.07285547 1 39 1 0.444084644 1 -40 0 +40 0 ? 0 41 1 0.278235674 1 42 1 1.82501435 1 43 1 0.0648527145 1 @@ -138,13 +138,13 @@ Instance Label Score Assigned 136 0 -1.21864986 0 137 0 -1.34436476 0 138 0 -1.15066063 0 -139 0 +139 0 ? 0 140 0 -1.34436476 0 141 0 -1.418377 0 142 1 0.605928659 1 143 0 -1.04888356 0 144 0 -1.36667442 0 -145 0 +145 0 ? 0 146 1 0.263422 1 147 0 -1.28818154 0 148 0 -0.38141644 0 @@ -157,13 +157,13 @@ Instance Label Score Assigned 155 1 0.7075844 1 156 0 -1.30894148 0 157 0 -1.3149718 0 -158 0 +158 0 ? 0 159 1 2.41813564 1 160 1 1.80749393 1 161 0 -1.10798192 0 162 0 -1.24095953 0 163 0 -1.14838839 0 -164 0 +164 0 ? 0 165 0 -0.9971055 0 166 1 1.77878284 1 167 1 1.0986836 1 @@ -234,7 +234,7 @@ Instance Label Score Assigned 232 0 -0.0348134041 0 233 1 1.102045 1 234 0 -0.709003 0 -235 0 +235 0 ? 0 236 1 2.13387632 1 237 1 1.230866 1 238 1 2.40226221 1 @@ -248,7 +248,7 @@ Instance Label Score Assigned 246 1 2.38478136 1 247 1 0.5292797 1 248 0 -0.8721206 0 -249 0 +249 0 ? 0 250 0 -1.3606441 0 251 1 1.35184932 1 252 0 0.7461872 1 @@ -274,7 +274,7 @@ Instance Label Score Assigned 272 1 0.451641083 1 273 1 -0.184997916 0 274 0 -1.1819942 0 -275 0 +275 0 ? 0 276 0 -1.29266214 0 277 0 -1.4406867 0 278 0 -1.38898408 0 @@ -291,12 +291,12 @@ Instance Label Score Assigned 289 1 1.698751 1 290 0 -1.49238932 0 291 0 -1.38898408 0 -292 1 +292 1 ? 0 293 1 0.9705 1 -294 0 +294 0 ? 0 295 1 1.44576406 1 296 0 0.264410973 1 -297 0 +297 0 ? 0 298 0 -0.785661459 0 299 1 1.16638494 1 300 1 1.27287531 1 @@ -314,13 +314,13 @@ Instance Label Score Assigned 312 1 0.6419792 1 313 0 -1.49238932 0 314 0 -1.4823153 0 -315 0 +315 0 ? 0 316 1 0.773257732 1 317 1 1.83582067 1 318 0 -1.26409817 0 319 0 0.303462982 1 320 1 1.16069293 1 -321 0 +321 0 ? 0 322 0 -1.24095953 0 323 1 0.909108639 1 324 0 -1.38898408 0 @@ -410,7 +410,7 @@ Instance Label Score Assigned 408 0 -0.9617897 0 409 0 -1.22467291 0 410 0 -1.4406867 0 -411 0 +411 0 ? 0 412 1 1.77363062 1 413 0 -1.02494574 0 414 1 1.26032782 1 @@ -616,7 +616,7 @@ Instance Label Score Assigned 614 0 -1.408303 0 615 0 -1.0826714 0 616 0 -1.29266214 0 -617 0 +617 0 ? 0 618 0 -1.21864986 0 619 0 -1.14463758 0 620 0 -1.29266214 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.txt index 55744d3dbb..c3e87fdfec 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.01161575 0.999099433 0.0012998283148002473 1 35 0 -5.546275 0.0038867984 0.0056183906346792553 0 37 0 -0.627175331 0.3481513 0.61739094617065504 0 -40 0 +40 0 ? ? ? 0 41 1 3.0365572 0.9541986 0.067638527006529184 1 44 1 6.61341858 0.998659551 0.0019351561965343684 1 45 0 -5.565003 0.00381495967 0.0055143486004666058 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.72209072 0.008818108 0.012778263407408707 0 141 0 -6.18090439 0.002064286 0.0029812133205916491 0 144 0 -5.546275 0.0038867984 0.0056183906346792553 0 -145 0 +145 0 ? ? ? 0 147 0 -5.305633 0.0049390397 0.0071431828757896592 0 150 0 -5.43029261 0.004362697 0.0063078105710409225 0 151 1 4.990402 0.993243039 0.0097813179599174817 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.58201027 0.00138314639 0.0019968397134892645 0 156 0 -5.6663394 0.00344857574 0.0049838416366178697 0 161 0 -3.992939 0.0181113519 0.026368671070599831 0 -164 0 +164 0 ? ? ? 0 167 1 6.79999542 0.9988875 0.0016059215653949079 1 169 0 -6.41288567 0.00163759827 0.0023644914709276374 0 171 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.4554977 0.9999712 4.1534408054175585E-05 1 247 1 2.7647686 0.940742 0.088128954823708028 1 248 0 -3.52696 0.0285547972 0.041795476162409473 0 -249 0 +249 0 ? ? ? 0 250 0 -6.30096865 0.00183116761 0.0026442381917314273 0 252 0 4.46767044 0.988656163 6.4619475094968903 1 254 1 5.268241 0.994873762 0.007414618780919654 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.312752 0.004904177 0.007092637921430613 0 271 0 -3.70832729 0.02393173 0.034946036680259647 0 272 1 3.75608158 0.9771588 0.033335080773514915 1 -275 0 +275 0 ? ? ? 0 276 0 -5.145169 0.005793728 0.0083828901690503332 0 277 0 -5.947381 0.002605866 0.0037643767357215252 0 278 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.312752 0.004904177 0.007092637921430613 0 293 1 4.889887 0.992533863 0.0108117709104356 1 296 0 1.11456966 0.752980053 2.0173005524167049 1 -297 0 +297 0 ? ? ? 0 299 1 7.92421341 0.999638259 0.00052197576964742038 1 300 1 5.01128674 0.993381739 0.009579868932513079 1 301 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.64061546 0.974434555 0.037362801100438832 1 317 1 8.889477 0.9998622 0.00019882564438936136 1 319 0 2.932189 0.9494149 4.3051439460642333 1 -321 0 +321 0 ? ? ? 0 323 1 5.11123276 0.994007468 0.0086714037349760222 1 327 0 -5.947381 0.002605866 0.0037643767357215252 0 328 1 3.24846554 0.962617934 0.054964793658751787 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 16.72465 0.99999994 8.5991327994145617E-08 1 613 0 -5.02813625 0.00650837226 0.0094202851039732072 0 614 0 -5.6638155 0.00345726055 0.0049964145907412149 0 -617 0 +617 0 ? ? ? 0 618 0 -4.744063 0.008628122 0.012501759455550775 0 619 0 -4.34295654 0.01283126 0.01863138517568386 0 621 0 0.373829842 0.5923841 1.2947177664921086 1 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.23449326 0.0142802689 0.020750590335179415 0 19 0 -3.10273981 0.04299438 0.063400697943788428 0 22 0 -5.130806 0.00587705 0.0085038042327941973 0 -23 1 +23 1 ? ? ? 0 24 0 -5.932124 0.002645822 0.0038221729188529165 0 26 0 -5.0699935 0.006243238 0.0090353231046563232 0 27 0 -3.99905157 0.01800297 0.026209433910245255 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.12534046 0.00590906851 0.0085502708566916 0 135 0 -2.97098112 0.0487542 0.072109916649189282 0 136 0 -4.56492853 0.0103033585 0.014941712057931327 0 -139 0 +139 0 ? ? ? 0 140 0 -5.461241 0.00423030974 0.0061159922992663762 0 142 1 3.05590534 0.9550368 0.066371741280747978 1 143 0 -5.35564375 0.004699242 0.0067955523991724961 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.276688 0.0136983329 0.019899122722508662 0 155 1 1.77740765 0.8553765 0.22536855303918271 1 157 0 -5.366247 0.004649908 0.0067240441834850199 0 -158 0 +158 0 ? ? ? 0 159 1 8.96132851 0.999871731 0.0001850652016644095 1 160 1 6.856139 0.998948157 0.001518287560141836 1 162 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.663686 0.9965423 0.0049970870434534837 1 232 0 0.928171158 0.71670413 1.8196185260804569 1 234 0 -3.391735 0.0325547643 0.047748098148187916 0 -235 0 +235 0 ? ? ? 0 236 1 7.88262749 0.9996229 0.00054416973071506056 1 238 1 8.651073 0.9998251 0.00025232061237670696 1 243 0 -4.03390265 0.0173970815 0.025319570708974903 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.9212322 0.99998194 2.6055606891255495E-05 1 287 0 -5.12534046 0.00590906851 0.0085502708566916 0 289 1 5.185871 0.9944361 0.0080494462952667781 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77450371 0.9916284 0.012128492162908918 1 298 0 -2.54918242 0.07248143 0.10855193009256793 0 302 1 11.1919193 0.999986231 1.9864132926342996E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.932124 0.002645822 0.0038221729188529165 0 310 0 -5.691217 0.00336412713 0.0048615916252809817 0 313 0 -6.59299469 0.0013680571 0.0019750404849533288 0 -315 0 +315 0 ? ? ? 0 318 0 -5.91572762 0.00268944423 0.0038852748092412133 0 320 1 4.1229 0.9840607 0.023180779648038535 1 322 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.26255941 0.00190273311 0.0027476785076986981 0 408 0 -4.386099 0.0122961234 0.017849522550602435 0 410 0 -6.26255941 0.00190273311 0.0027476785076986981 0 -411 0 +411 0 ? ? ? 0 412 1 7.037607 0.99912256 0.0012664339928697493 1 417 0 -6.26255941 0.00190273311 0.0027476785076986981 0 420 0 -3.338777 0.0342646 0.050300135586962377 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt index 0a4ce514a2..3b22d36b5d 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-CV-breast-cancer.withThreshold.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.01161575 0.999099433 0.0012998283148002473 1 35 0 -5.546275 0.0038867984 0.0056183906346792553 0 37 0 -0.627175331 0.3481513 0.61739094617065504 0 -40 0 +40 0 ? ? ? 0 41 1 3.0365572 0.9541986 0.067638527006529184 1 44 1 6.61341858 0.998659551 0.0019351561965343684 1 45 0 -5.565003 0.00381495967 0.0055143486004666058 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.72209072 0.008818108 0.012778263407408707 0 141 0 -6.18090439 0.002064286 0.0029812133205916491 0 144 0 -5.546275 0.0038867984 0.0056183906346792553 0 -145 0 +145 0 ? ? ? 0 147 0 -5.305633 0.0049390397 0.0071431828757896592 0 150 0 -5.43029261 0.004362697 0.0063078105710409225 0 151 1 4.990402 0.993243039 0.0097813179599174817 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.58201027 0.00138314639 0.0019968397134892645 0 156 0 -5.6663394 0.00344857574 0.0049838416366178697 0 161 0 -3.992939 0.0181113519 0.026368671070599831 0 -164 0 +164 0 ? ? ? 0 167 1 6.79999542 0.9988875 0.0016059215653949079 1 169 0 -6.41288567 0.00163759827 0.0023644914709276374 0 171 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.4554977 0.9999712 4.1534408054175585E-05 1 247 1 2.7647686 0.940742 0.088128954823708028 0 248 0 -3.52696 0.0285547972 0.041795476162409473 0 -249 0 +249 0 ? ? ? 0 250 0 -6.30096865 0.00183116761 0.0026442381917314273 0 252 0 4.46767044 0.988656163 6.4619475094968903 1 254 1 5.268241 0.994873762 0.007414618780919654 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.312752 0.004904177 0.007092637921430613 0 271 0 -3.70832729 0.02393173 0.034946036680259647 0 272 1 3.75608158 0.9771588 0.033335080773514915 1 -275 0 +275 0 ? ? ? 0 276 0 -5.145169 0.005793728 0.0083828901690503332 0 277 0 -5.947381 0.002605866 0.0037643767357215252 0 278 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.312752 0.004904177 0.007092637921430613 0 293 1 4.889887 0.992533863 0.0108117709104356 1 296 0 1.11456966 0.752980053 2.0173005524167049 0 -297 0 +297 0 ? ? ? 0 299 1 7.92421341 0.999638259 0.00052197576964742038 1 300 1 5.01128674 0.993381739 0.009579868932513079 1 301 0 -5.312752 0.004904177 0.007092637921430613 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.64061546 0.974434555 0.037362801100438832 1 317 1 8.889477 0.9998622 0.00019882564438936136 1 319 0 2.932189 0.9494149 4.3051439460642333 0 -321 0 +321 0 ? ? ? 0 323 1 5.11123276 0.994007468 0.0086714037349760222 1 327 0 -5.947381 0.002605866 0.0037643767357215252 0 328 1 3.24846554 0.962617934 0.054964793658751787 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 16.72465 0.99999994 8.5991327994145617E-08 1 613 0 -5.02813625 0.00650837226 0.0094202851039732072 0 614 0 -5.6638155 0.00345726055 0.0049964145907412149 0 -617 0 +617 0 ? ? ? 0 618 0 -4.744063 0.008628122 0.012501759455550775 0 619 0 -4.34295654 0.01283126 0.01863138517568386 0 621 0 0.373829842 0.5923841 1.2947177664921086 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.23449326 0.0142802689 0.020750590335179415 0 19 0 -3.10273981 0.04299438 0.063400697943788428 0 22 0 -5.130806 0.00587705 0.0085038042327941973 0 -23 1 +23 1 ? ? ? 0 24 0 -5.932124 0.002645822 0.0038221729188529165 0 26 0 -5.0699935 0.006243238 0.0090353231046563232 0 27 0 -3.99905157 0.01800297 0.026209433910245255 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.12534046 0.00590906851 0.0085502708566916 0 135 0 -2.97098112 0.0487542 0.072109916649189282 0 136 0 -4.56492853 0.0103033585 0.014941712057931327 0 -139 0 +139 0 ? ? ? 0 140 0 -5.461241 0.00423030974 0.0061159922992663762 0 142 1 3.05590534 0.9550368 0.066371741280747978 1 143 0 -5.35564375 0.004699242 0.0067955523991724961 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.276688 0.0136983329 0.019899122722508662 0 155 1 1.77740765 0.8553765 0.22536855303918271 0 157 0 -5.366247 0.004649908 0.0067240441834850199 0 -158 0 +158 0 ? ? ? 0 159 1 8.96132851 0.999871731 0.0001850652016644095 1 160 1 6.856139 0.998948157 0.001518287560141836 1 162 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.663686 0.9965423 0.0049970870434534837 1 232 0 0.928171158 0.71670413 1.8196185260804569 0 234 0 -3.391735 0.0325547643 0.047748098148187916 0 -235 0 +235 0 ? ? ? 0 236 1 7.88262749 0.9996229 0.00054416973071506056 1 238 1 8.651073 0.9998251 0.00025232061237670696 1 243 0 -4.03390265 0.0173970815 0.025319570708974903 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.9212322 0.99998194 2.6055606891255495E-05 1 287 0 -5.12534046 0.00590906851 0.0085502708566916 0 289 1 5.185871 0.9944361 0.0080494462952667781 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.77450371 0.9916284 0.012128492162908918 1 298 0 -2.54918242 0.07248143 0.10855193009256793 0 302 1 11.1919193 0.999986231 1.9864132926342996E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.932124 0.002645822 0.0038221729188529165 0 310 0 -5.691217 0.00336412713 0.0048615916252809817 0 313 0 -6.59299469 0.0013680571 0.0019750404849533288 0 -315 0 +315 0 ? ? ? 0 318 0 -5.91572762 0.00268944423 0.0038852748092412133 0 320 1 4.1229 0.9840607 0.023180779648038535 1 322 0 -4.80037 0.008159574 0.011820066323398703 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.26255941 0.00190273311 0.0027476785076986981 0 408 0 -4.386099 0.0122961234 0.017849522550602435 0 410 0 -6.26255941 0.00190273311 0.0027476785076986981 0 -411 0 +411 0 ? ? ? 0 412 1 7.037607 0.99912256 0.0012664339928697493 1 417 0 -6.26255941 0.00190273311 0.0027476785076986981 0 420 0 -3.338777 0.0342646 0.050300135586962377 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt index 2748bad72f..db7cabc84e 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 5.917115 0.9973143 0.003879895635620469 1 35 0 -5.023262 0.006539965 0.0094661631818091386 0 37 0 -1.38112783 0.200827926 0.3234219246434899 0 -40 0 +40 0 ? ? ? 0 41 1 1.99977684 0.880773664 0.18315676412764836 1 44 1 6.1460495 0.997862637 0.0030868629883528767 1 45 0 -4.92853165 0.00718512246 0.010403360173725132 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.17745543 0.0151058007 0.021959341229968121 0 141 0 -5.50801945 0.00403775927 0.0058370475132605874 0 144 0 -5.023262 0.006539965 0.0094661631818091386 0 -145 0 +145 0 ? ? ? 0 147 0 -4.81479263 0.00804367848 0.011651498579728541 0 150 0 -4.92451668 0.00721382024 0.01044506254471955 0 151 1 4.048602 0.9828524 0.024953319944645107 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -5.80309772 0.00300910859 0.0043477707884351756 0 156 0 -4.933721 0.00714819832 0.010349705365439381 0 161 0 -3.81854677 0.0214878246 0.031338292831631254 0 -164 0 +164 0 ? ? ? 0 167 1 7.07203674 0.999152243 0.0012235733412714462 1 169 0 -5.652446 0.00349665456 0.0050534464360921993 0 171 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.218597 0.9999008 0.00014309666195095306 1 247 1 1.9463377 0.87504673 0.19256803177214576 1 248 0 -3.2993412 0.0355937965 0.052287163877458576 0 -249 0 +249 0 ? ? ? 0 250 0 -5.418478 0.004414317 0.006382611353481173 0 252 0 3.48067 0.970132768 5.0652926577610451 1 254 1 4.863985 0.9923395 0.011094325676462969 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.83358335 0.007895125 0.011435459622816726 0 271 0 -3.65327024 0.0252520833 0.036898928715096181 0 272 1 3.02218533 0.9535664 0.068594734962942897 1 -275 0 +275 0 ? ? ? 0 276 0 -4.72818375 0.008765012 0.012700983189760973 0 277 0 -5.3183403 0.00487697963 0.00705320751084503 0 278 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.83358335 0.007895125 0.011435459622816726 0 293 1 3.6722765 0.975211561 0.036212866280089365 1 296 0 0.9506779 0.7212515 1.8429639897619017 1 -297 0 +297 0 ? ? ? 0 299 1 5.993658 0.9975117 0.0035943536619517991 1 300 1 5.41301346 0.9955616 0.0064175103553020287 1 301 0 -4.83358335 0.007895125 0.011435459622816726 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.857479 0.945704 0.080539421519246923 1 317 1 7.483817 0.9994382 0.00081069597738820146 1 319 0 2.14308548 0.895020843 3.2518251711052208 1 -321 0 +321 0 ? ? ? 0 323 1 4.38443756 0.987683654 0.017879061305842645 1 327 0 -5.3183403 0.00487697963 0.00705320751084503 0 328 1 1.9153614 0.8716203 0.19822830198625993 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 15.0148582 0.9999997 4.2995669122556443E-07 1 613 0 -4.76532364 0.008448151 0.012239880753341762 0 614 0 -5.114196 0.005974896 0.00864580721752186 0 -617 0 +617 0 ? ? ? 0 618 0 -4.43310547 0.0117381271 0.017034712310540854 0 619 0 -4.138027 0.0157037526 0.022835500585175472 0 621 0 -0.147964478 0.463076234 0.89721082981393785 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.094967 0.0163834114 0.023832028779250634 0 19 0 -3.158533 0.0407563634 0.06003080556474015 0 22 0 -4.786547 0.008272208 0.011983909171361835 0 -23 1 +23 1 ? ? ? 0 24 0 -5.49961758 0.00407168828 0.0058861960152521283 0 26 0 -4.637971 0.009584561 0.013894290826829943 0 27 0 -3.85011339 0.02083403 0.030374676365783088 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.786547 0.008272208 0.011983909171361835 0 135 0 -2.91646814 0.0513454638 0.07604528657651706 0 136 0 -4.31833 0.01314697 0.019092852440504127 0 -139 0 +139 0 ? ? ? 0 140 0 -5.00991058 0.006627286 0.009592975357514695 0 142 1 2.566639 0.9286834 0.10674124733849408 1 143 0 -4.68576 0.009141385 0.013248879501449318 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.91140127 0.0196197983 0.028586744893093978 0 155 1 1.73489761 0.8500378 0.23440107518588552 1 157 0 -5.03140068 0.00648729829 0.0093896829482070059 0 -158 0 +158 0 ? ? ? 0 159 1 8.209714 0.9997281 0.00039234577165867646 1 160 1 6.214038 0.9980028 0.0028841924162386562 1 162 0 -4.563184 0.0103211654 0.014967669647872791 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.128908 0.994111836 0.0085199331359509835 1 232 0 0.387944221 0.5957877 1.3068148861794051 1 234 0 -3.05071926 0.04518643 0.066709024779774861 0 -235 0 +235 0 ? ? ? 0 236 1 7.45407867 0.999421239 0.00083521748895634131 1 238 1 8.351737 0.9997641 0.00034039381967173406 1 243 0 -3.79230738 0.02204652 0.032162254419967784 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.2351351 0.9999641 5.1767706679585828E-05 1 287 0 -4.786547 0.008272208 0.011983909171361835 0 289 1 5.007719 0.993358254 0.0096139756423385195 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.08445454 0.9834463 0.024081816959951673 1 298 0 -2.520249 0.07445079 0.11161839829444895 0 302 1 10.2925406 0.999966145 4.8843899665517181E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.49961758 0.00407168828 0.0058861960152521283 0 310 0 -5.254764 0.00519544445 0.0075149807235167431 0 313 0 -5.94634438 0.00260856142 0.0037682756571934027 0 -315 0 +315 0 ? ? ? 0 318 0 -5.49961758 0.00407168828 0.0058861960152521283 0 320 1 3.84560966 0.9790739 0.030510363432469476 1 322 0 -4.563184 0.0103211654 0.014967669647872791 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.722981 0.003259291 0.00470984242704696 0 408 0 -3.98740244 0.0182100739 0.026513731287650329 0 410 0 -5.722981 0.003259291 0.00470984242704696 0 -411 0 +411 0 ? ? ? 0 412 1 5.972435 0.997458458 0.0036713375244963246 1 417 0 -5.722981 0.003259291 0.00470984242704696 0 420 0 -3.14838743 0.0411548652 0.060630273661156198 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt index c81880f594..3e137abda1 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-GaussianNorm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 5.678712 0.9965937 0.0049226209615764287 1 21 1 6.001464 0.997531 0.0035664232425213858 1 22 0 -5.02218437 0.00654697046 0.009476336418019371 0 -23 1 +23 1 ? ? ? 0 24 0 -5.557753 0.00384261133 0.0055543948499928858 0 25 1 0.7356682 0.676047862 0.56480270753052697 1 26 0 -5.01491547 0.006594418 0.0095452416271748757 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.86437464 0.134193972 0.20788424941725006 0 38 1 4.26738262 0.986175358 0.020083890425044392 1 39 1 0.948868752 0.7208876 0.47215375855428471 1 -40 0 +40 0 ? ? ? 0 41 1 2.21941185 0.9019792 0.14883391736209772 1 42 1 6.171118 0.997915447 0.00301051350343931 1 43 1 -0.406847954 0.399668157 1.3231254618912542 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.573848 0.0102128033 0.014809714349895977 0 137 0 -5.38329029 0.00457168929 0.0066106759896893642 0 138 0 -4.32377768 0.0130764758 0.018989799023276886 0 -139 0 +139 0 ? ? ? 0 140 0 -5.38329029 0.00457168929 0.0066106759896893642 0 141 0 -5.83162737 0.00292472052 0.0042256622529479159 0 142 1 3.1618576 0.9593734 0.059835633747694031 1 143 0 -4.83655453 0.007871887 0.011401667353256314 0 144 0 -5.470522 0.004191393 0.00605960993587892 0 -145 0 +145 0 ? ? ? 0 146 1 0.0557060242 0.5139229 0.96037607240274647 1 147 0 -5.47995234 0.00415221555 0.0060028519951750492 0 148 0 -1.59599066 0.168542728 0.26628596824729922 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.20912266 0.901065767 0.15029568586304221 1 156 0 -5.58346558 0.003745433 0.0054136621278108544 0 157 0 -5.109416 0.006003351 0.0086871066895572498 0 -158 0 +158 0 ? ? ? 0 159 1 9.879018 0.99994874 7.3954435339176224E-05 1 160 1 6.966673 0.9990581 0.0013594752512588889 1 161 0 -4.01959 0.0176434461 0.025681337989317106 0 162 0 -4.661079 0.009367671 0.01357839101244592 0 163 0 -3.39304066 0.03251367 0.04768681882905737 0 -164 0 +164 0 ? ? ? 0 165 0 -3.65873766 0.0251178537 0.036700273114887771 0 166 1 6.106388 0.9977764 0.0032115643460085509 1 167 1 6.96164227 0.999053359 0.0013663610592643267 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.434880257 0.607038438 1.3475398955993996 1 233 1 4.62145329 0.9902574 0.014124543390739288 1 234 0 -3.19655 0.0392957628 0.057835744186683809 0 -235 0 +235 0 ? ? ? 0 236 1 9.488486 0.9999243 0.00010921311695177715 1 237 1 5.185233 0.994432569 0.0080545481788837064 1 238 1 10.3383093 0.999967635 4.6694045347237877E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.706185 0.9998345 0.00023881767606976273 1 247 1 1.85991 0.865286469 0.20875025156266316 1 248 0 -3.24698353 0.03743543 0.055044774760243595 0 -249 0 +249 0 ? ? ? 0 250 0 -5.9445715 0.00261317822 0.0037749537255417041 0 251 1 6.851535 0.998943269 0.001525346290754842 1 252 0 3.15871048 0.959250569 4.6170762873605469 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.54124355 0.8236454 0.27990471757503477 1 273 1 -0.485281944 0.381005645 1.3921157227274092 0 274 0 -4.467927 0.0113409776 0.016455057773140165 0 -275 0 +275 0 ? ? ? 0 276 0 -5.02218437 0.00654697046 0.009476336418019371 0 277 0 -5.918859 0.00268105837 0.0038731439944547637 0 278 0 -5.557753 0.00384261133 0.0055543948499928858 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 6.391654 0.9983273 0.0024151950596217639 1 290 0 -6.27996445 0.00186996383 0.0027003130775063206 0 291 0 -5.557753 0.00384261133 0.0055543948499928858 0 -292 1 +292 1 ? ? ? 0 293 1 4.062559 0.98308605 0.024610393646554929 1 -294 0 +294 0 ? ? ? 0 295 1 5.48979473 0.9958883 0.0059441683338054239 1 296 0 0.7984762 0.689648449 1.6880247409542737 1 -297 0 +297 0 ? ? ? 0 298 0 -2.82211971 0.05614051 0.083355987427050499 0 299 1 5.92269325 0.9973292 0.0038583400725341575 1 300 1 5.553643 0.9961416 0.0055772429838620297 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.505681 0.924539149 0.11319368429557292 1 313 0 -6.27996445 0.00186996383 0.0027003130775063206 0 314 0 -5.996913 0.00248024915 0.0035826879721537609 0 -315 0 +315 0 ? ? ? 0 316 1 2.06765842 0.8877198 0.17182370265537225 1 317 1 6.799261 0.998886645 0.0016071267852858751 1 318 0 -5.323591 0.004851562 0.0070163583406618974 0 319 0 1.16352749 0.7619731 2.0708033667370818 1 320 1 5.47767448 0.995838344 0.006016528395062894 1 -321 0 +321 0 ? ? ? 0 322 0 -4.661079 0.009367671 0.01357839101244592 0 323 1 3.7019043 0.975917757 0.035168522105486828 1 324 0 -5.557753 0.00384261133 0.0055543948499928858 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.06660128 0.0168468487 0.024511924018010906 0 409 0 -4.77211475 0.008391453 0.012157388278315496 0 410 0 -5.918859 0.00268105837 0.0038731439944547637 0 -411 0 +411 0 ? ? ? 0 412 1 6.93106556 0.999024 0.0014087955764285343 1 413 0 -3.5143342 0.02890712 0.042318806680098388 0 414 1 4.808298 0.9919043 0.0117271336749766 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -5.548576 0.00387790077 0.0056055040477979955 0 615 0 -4.073707 0.0167295579 0.024339819893712551 0 616 0 -5.02218437 0.00654697046 0.009476336418019371 0 -617 0 +617 0 ? ? ? 0 618 0 -4.573848 0.0102128033 0.014809714349895977 0 619 0 -4.12551069 0.0158983991 0.023120824431946189 0 620 0 -5.02218437 0.00654697046 0.009476336418019371 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt index 4789a49c72..51deeac562 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 7.041827 0.999126256 0.0012610978584151201 1 21 1 7.16006565 0.9992236 0.0011205580676841706 1 22 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -23 1 +23 1 ? ? ? 0 24 0 -6.16573143 0.00209578 0.003026744365055557 0 25 1 1.140585 0.757787049 0.4001356116356401 1 26 0 -5.39192247 0.00453257374 0.006553986122637143 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.74917936 0.148150727 0.23132991391158644 0 38 1 5.25044537 0.9947822 0.0075473881419006704 1 39 1 1.14784718 0.7591175 0.39760491464649406 1 -40 0 +40 0 ? ? ? 0 41 1 2.83779049 0.9446841 0.082096136496848479 1 42 1 5.84602737 0.997117 0.0041653216557672994 1 43 1 -0.115566254 0.471140563 1.0857705467412089 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.81180668 0.008067538 0.011686200181083801 0 137 0 -5.771742 0.003104659 0.0044860436134326774 0 138 0 -4.708121 0.008941052 0.01295722303877558 0 -139 0 +139 0 ? ? ? 0 140 0 -5.771742 0.003104659 0.0044860436134326774 0 141 0 -6.35020638 0.00174334215 0.0025173059685722476 0 142 1 3.59143162 0.9731803 0.039220987499558525 1 143 0 -5.32383156 0.00485040154 0.0070146760362167319 0 144 0 -5.96873665 0.00255094632 0.0036849396417204128 0 -145 0 +145 0 ? ? ? 0 146 1 -0.228344917 0.443160534 1.1740986883981319 0 147 0 -6.02899933 0.00240211817 0.0034696929544304877 0 148 0 -1.17236042 0.236428589 0.38916500660996345 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.561039 0.928311646 0.10731887720885433 1 156 0 -6.32817459 0.0017821081 0.0025733321754180501 0 157 0 -5.587267 0.00373127544 0.0053931605303660906 0 -158 0 +158 0 ? ? ? 0 159 1 9.957122 0.9999526 6.836472348564471E-05 1 160 1 7.81574535 0.999596834 0.00058176260649806227 1 161 0 -4.194911 0.0148482891 0.02158218175164682 0 162 0 -5.008802 0.00663458835 0.0096035809636014526 0 163 0 -4.450941 0.0115330191 0.016735320470494343 0 -164 0 +164 0 ? ? ? 0 165 0 -3.79370117 0.022016488 0.0321179521831969 0 166 1 6.78795338 0.998874 0.0016253773809814112 1 167 1 6.650817 0.9987087 0.0018641198790525675 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.440856934 0.6084632 1.3527802863415537 1 233 1 5.59169 0.996285141 0.0053693888063565016 1 234 0 -3.380312 0.0329164639 0.048287580681296273 0 -235 0 +235 0 ? ? ? 0 236 1 9.42349148 0.9999192 0.00011660894974341421 1 237 1 5.527053 0.9960381 0.005727196256675989 1 238 1 10.3295126 0.999967337 4.7124015954602722E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.522981 0.999926865 0.00010551521477366078 1 247 1 2.22113419 0.9021314 0.14859054441592434 1 248 0 -3.25320721 0.037211813 0.0547096542674552 0 -249 0 +249 0 ? ? ? 0 250 0 -6.70964432 0.00121761323 0.0017577148937104486 0 251 1 8.275113 0.9997453 0.00036748773339945429 1 252 0 3.75820923 0.97720623 5.4552166396980901 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.25216866 0.777675033 0.36276067331990891 1 273 1 -0.4770708 0.382944047 1.3847944817499098 0 274 0 -4.773376 0.008380964 0.01214212852075712 0 -275 0 +275 0 ? ? ? 0 276 0 -5.39027166 0.00454002852 0.0065647900946516503 0 277 0 -6.547201 0.00143207016 0.0020675212884715419 0 278 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.07017231 0.999150634 0.0012258970788924348 1 290 0 -6.928671 0.0009783435 0.0014121422633917285 0 291 0 -6.16573143 0.00209578 0.003026744365055557 0 -292 1 +292 1 ? ? ? 0 293 1 5.206727 0.9945503 0.0078837745923848179 1 -294 0 +294 0 ? ? ? 0 295 1 6.0751133 0.9977059 0.0033135223937081622 1 296 0 1.04628563 0.740061 1.9437549070362801 1 -297 0 +297 0 ? ? ? 0 298 0 -2.41310167 0.08217907 0.12371538777024385 0 299 1 7.396323 0.999386847 0.00088486407096525718 1 300 1 5.69218445 0.996639132 0.0048568731086720204 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 3.90498447 0.9802564 0.02876897031816629 1 313 0 -6.928671 0.0009783435 0.0014121422633917285 0 314 0 -6.664193 0.00127416 0.0018393963949252371 0 -315 0 +315 0 ? ? ? 0 316 1 2.20020485 0.9002679 0.15157371666802275 1 317 1 7.75578 0.9995719 0.00061772192605728435 1 318 0 -5.81475639 0.002974334 0.0042974510254079716 0 319 0 1.38163567 0.7992536 2.3165538563686012 1 320 1 5.837037 0.997091 0.0042029227668842466 1 -321 0 +321 0 ? ? ? 0 322 0 -5.008802 0.00663458835 0.0096035809636014526 0 323 1 4.26100159 0.9860881 0.020211552173447847 1 324 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.4899044 0.0110971872 0.016099351806882618 0 409 0 -5.286586 0.00503353868 0.0072801992795113032 0 410 0 -6.547201 0.00143207016 0.0020675212884715419 0 -411 0 +411 0 ? ? ? 0 412 1 7.575248 0.9994873 0.00073988707471124668 1 413 0 -3.748186 0.0230181254 0.033596298028071243 0 414 1 5.274519 0.994905651 0.0073683771133106393 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.085728 0.00226994278 0.0032785576690230068 0 615 0 -4.604435 0.0099082 0.01436579907268937 0 616 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -617 0 +617 0 ? ? ? 0 618 0 -4.81180668 0.008067538 0.011686200181083801 0 619 0 -4.233342 0.0142964814 0.020774319020048612 0 620 0 -5.39027166 0.00454002852 0.0065647900946516503 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt index 92afd941f0..a1bd6936bc 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-TrainTest-breast-cancer.withThreshold.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 7.041827 0.999126256 0.0012610978584151201 1 21 1 7.16006565 0.9992236 0.0011205580676841706 1 22 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -23 1 +23 1 ? ? ? 0 24 0 -6.16573143 0.00209578 0.003026744365055557 0 25 1 1.140585 0.757787049 0.4001356116356401 0 26 0 -5.39192247 0.00453257374 0.006553986122637143 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.74917936 0.148150727 0.23132991391158644 0 38 1 5.25044537 0.9947822 0.0075473881419006704 1 39 1 1.14784718 0.7591175 0.39760491464649406 0 -40 0 +40 0 ? ? ? 0 41 1 2.83779049 0.9446841 0.082096136496848479 0 42 1 5.84602737 0.997117 0.0041653216557672994 1 43 1 -0.115566254 0.471140563 1.0857705467412089 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.81180668 0.008067538 0.011686200181083801 0 137 0 -5.771742 0.003104659 0.0044860436134326774 0 138 0 -4.708121 0.008941052 0.01295722303877558 0 -139 0 +139 0 ? ? ? 0 140 0 -5.771742 0.003104659 0.0044860436134326774 0 141 0 -6.35020638 0.00174334215 0.0025173059685722476 0 142 1 3.59143162 0.9731803 0.039220987499558525 1 143 0 -5.32383156 0.00485040154 0.0070146760362167319 0 144 0 -5.96873665 0.00255094632 0.0036849396417204128 0 -145 0 +145 0 ? ? ? 0 146 1 -0.228344917 0.443160534 1.1740986883981319 0 147 0 -6.02899933 0.00240211817 0.0034696929544304877 0 148 0 -1.17236042 0.236428589 0.38916500660996345 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.561039 0.928311646 0.10731887720885433 0 156 0 -6.32817459 0.0017821081 0.0025733321754180501 0 157 0 -5.587267 0.00373127544 0.0053931605303660906 0 -158 0 +158 0 ? ? ? 0 159 1 9.957122 0.9999526 6.836472348564471E-05 1 160 1 7.81574535 0.999596834 0.00058176260649806227 1 161 0 -4.194911 0.0148482891 0.02158218175164682 0 162 0 -5.008802 0.00663458835 0.0096035809636014526 0 163 0 -4.450941 0.0115330191 0.016735320470494343 0 -164 0 +164 0 ? ? ? 0 165 0 -3.79370117 0.022016488 0.0321179521831969 0 166 1 6.78795338 0.998874 0.0016253773809814112 1 167 1 6.650817 0.9987087 0.0018641198790525675 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.440856934 0.6084632 1.3527802863415537 0 233 1 5.59169 0.996285141 0.0053693888063565016 1 234 0 -3.380312 0.0329164639 0.048287580681296273 0 -235 0 +235 0 ? ? ? 0 236 1 9.42349148 0.9999192 0.00011660894974341421 1 237 1 5.527053 0.9960381 0.005727196256675989 1 238 1 10.3295126 0.999967337 4.7124015954602722E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.522981 0.999926865 0.00010551521477366078 1 247 1 2.22113419 0.9021314 0.14859054441592434 0 248 0 -3.25320721 0.037211813 0.0547096542674552 0 -249 0 +249 0 ? ? ? 0 250 0 -6.70964432 0.00121761323 0.0017577148937104486 0 251 1 8.275113 0.9997453 0.00036748773339945429 1 252 0 3.75820923 0.97720623 5.4552166396980901 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.25216866 0.777675033 0.36276067331990891 0 273 1 -0.4770708 0.382944047 1.3847944817499098 0 274 0 -4.773376 0.008380964 0.01214212852075712 0 -275 0 +275 0 ? ? ? 0 276 0 -5.39027166 0.00454002852 0.0065647900946516503 0 277 0 -6.547201 0.00143207016 0.0020675212884715419 0 278 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.07017231 0.999150634 0.0012258970788924348 1 290 0 -6.928671 0.0009783435 0.0014121422633917285 0 291 0 -6.16573143 0.00209578 0.003026744365055557 0 -292 1 +292 1 ? ? ? 0 293 1 5.206727 0.9945503 0.0078837745923848179 1 -294 0 +294 0 ? ? ? 0 295 1 6.0751133 0.9977059 0.0033135223937081622 1 296 0 1.04628563 0.740061 1.9437549070362801 0 -297 0 +297 0 ? ? ? 0 298 0 -2.41310167 0.08217907 0.12371538777024385 0 299 1 7.396323 0.999386847 0.00088486407096525718 1 300 1 5.69218445 0.996639132 0.0048568731086720204 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 3.90498447 0.9802564 0.02876897031816629 1 313 0 -6.928671 0.0009783435 0.0014121422633917285 0 314 0 -6.664193 0.00127416 0.0018393963949252371 0 -315 0 +315 0 ? ? ? 0 316 1 2.20020485 0.9002679 0.15157371666802275 0 317 1 7.75578 0.9995719 0.00061772192605728435 1 318 0 -5.81475639 0.002974334 0.0042974510254079716 0 319 0 1.38163567 0.7992536 2.3165538563686012 0 320 1 5.837037 0.997091 0.0042029227668842466 1 -321 0 +321 0 ? ? ? 0 322 0 -5.008802 0.00663458835 0.0096035809636014526 0 323 1 4.26100159 0.9860881 0.020211552173447847 1 324 0 -6.16573143 0.00209578 0.003026744365055557 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.4899044 0.0110971872 0.016099351806882618 0 409 0 -5.286586 0.00503353868 0.0072801992795113032 0 410 0 -6.547201 0.00143207016 0.0020675212884715419 0 -411 0 +411 0 ? ? ? 0 412 1 7.575248 0.9994873 0.00073988707471124668 1 413 0 -3.748186 0.0230181254 0.033596298028071243 0 414 1 5.274519 0.994905651 0.0073683771133106393 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.085728 0.00226994278 0.0032785576690230068 0 615 0 -4.604435 0.0099082 0.01436579907268937 0 616 0 -5.39027166 0.00454002852 0.0065647900946516503 0 -617 0 +617 0 ? ? ? 0 618 0 -4.81180668 0.008067538 0.011686200181083801 0 619 0 -4.233342 0.0142964814 0.020774319020048612 0 620 0 -5.39027166 0.00454002852 0.0065647900946516503 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt index 946584d7cb..6f9ac3e2b6 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 4.455554 0.98851943 0.016658771299360001 1 35 0 -5.20966339 0.005433825 0.0078607284616035548 0 37 0 -1.95550251 0.1239546 0.19092245979854006 0 -40 0 +40 0 ? ? ? 0 41 1 1.363338 0.796301663 0.32861302447496599 1 44 1 2.93189621 0.949400842 0.074910765340690438 1 45 0 -5.4227767 0.00439546537 0.0063552935777515963 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.283581 0.01360552 0.019763368834071083 0 141 0 -5.903559 0.0027222808 0.0039327765009182425 0 144 0 -5.20966339 0.005433825 0.0078607284616035548 0 -145 0 +145 0 ? ? ? 0 147 0 -4.808229 0.008096219 0.011727915268260366 0 150 0 -4.955528 0.00699508563 0.010127237230146099 0 151 1 2.9794178 0.9516356 0.071518853245330763 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.343319 0.00175536959 0.0025346883174766728 0 156 0 -4.870145 0.00761383865 0.011026477645071437 0 161 0 -3.876839 0.0202957559 0.029581804866342248 0 -164 0 +164 0 ? ? ? 0 167 1 1.63665962 0.8370799 0.25656278900168672 1 169 0 -6.067718 0.00231109979 0.0033380709957833601 0 171 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 6.373105 0.9982961 0.0024603307167519865 1 247 1 1.08481979 0.747405 0.42003789409759523 1 248 0 -3.67766953 0.0246584155 0.036020526273661445 0 -249 0 +249 0 ? ? ? 0 250 0 -5.56404066 0.00381861837 0.0055196472135604522 0 252 0 2.67256546 0.935388267 3.9520600195019679 1 254 1 2.64337635 0.933601558 0.099121125939627047 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.955528 0.00699508563 0.010127237230146099 0 271 0 -3.63624859 0.0256744642 0.037524217400354012 0 272 1 2.26432514 0.905879 0.14260970234578763 1 -275 0 +275 0 ? ? ? 0 276 0 -5.20966339 0.005433825 0.0078607284616035548 0 277 0 -5.6494236 0.003507201 0.0050687153327134771 0 278 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.955528 0.00699508563 0.010127237230146099 0 293 1 2.05892181 0.886846 0.17324448166666195 1 296 0 1.99554062 0.880328059 3.0628431682446857 1 -297 0 +297 0 ? ? ? 0 299 1 3.69336033 0.9757162 0.035466552037373179 1 300 1 4.02837372 0.9825081 0.025458759934045509 1 301 0 -4.955528 0.00699508563 0.010127237230146099 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.94207382 0.9498875 0.074171415588786135 1 317 1 4.312872 0.986782 0.019196674360521217 1 319 0 2.41989422 0.9183318 3.6140817927103961 1 -321 0 +321 0 ? ? ? 0 323 1 3.56970787 0.972607434 0.040070476517068486 1 327 0 -5.6494236 0.003507201 0.0050687153327134771 0 328 1 1.70264626 0.845880032 0.24147502978012392 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 7.478863 0.9994354 0.00081473984708772015 1 613 0 -4.91670847 0.00726995664 0.010526641032229434 0 614 0 -5.20966339 0.005433825 0.0078607284616035548 0 -617 0 +617 0 ? ? ? 0 618 0 -4.769904 0.008409867 0.01218417928555986 0 619 0 -4.330144 0.0129945707 0.018870074173474232 0 621 0 -0.284356117 0.429386139 0.80941330227154873 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -3.99180722 0.0181314889 0.026398258875503611 0 19 0 -3.49091744 0.0295717642 0.043306566793261343 0 22 0 -4.74231339 0.008643099 0.012523555687056959 0 -23 1 +23 1 ? ? ? 0 24 0 -4.99358654 0.00673562335 0.0097503246472711684 0 26 0 -4.230923 0.01433061 0.020824270758417435 0 27 0 -3.74053383 0.0231908429 0.0338513700113664 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.74265528 0.00864017 0.012519293184591884 0 135 0 -2.40284228 0.08295622 0.12493748028384198 0 136 0 -4.24142361 0.0141830426 0.020608297294341209 0 -139 0 +139 0 ? ? ? 0 140 0 -4.99192953 0.006746718 0.0097664397610658042 0 142 1 1.883389 0.8679999 0.20423319963131603 1 143 0 -3.93245554 0.0192188919 0.027996904861385053 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.71085787 0.02387269 0.034858773809671437 0 155 1 2.1317234 0.8939485 0.1617363815870177 1 157 0 -4.492697 0.0110665858 0.016054708591916572 0 -158 0 +158 0 ? ? ? 0 159 1 7.019025 0.9991061 0.0012901886373564615 1 160 1 5.00210667 0.9933211 0.0096679074407147667 1 162 0 -4.492697 0.0110665858 0.016054708591916572 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 4.06135273 0.983066 0.02463978413547805 1 232 0 0.582720757 0.641693234 1.4807328100816015 1 234 0 -2.76217723 0.0594026 0.08835075047841523 0 -235 0 +235 0 ? ? ? 0 236 1 5.42756176 0.995625436 0.0063250060270779312 1 238 1 6.00247669 0.9975335 0.0035628026721853763 1 243 0 -3.18360639 0.039787326 0.058574116415727635 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 7.122633 0.999194 0.0011632435375058345 1 287 0 -4.74265528 0.00864017 0.012519293184591884 0 289 1 5.24414349 0.994749367 0.0075950186704868833 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 3.85042763 0.979172349 0.030365276805999493 1 298 0 -2.97889614 0.0483884327 0.07155528637722855 0 302 1 7.28154945 0.999312341 0.00099242318488484949 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -4.99358654 0.00673562335 0.0097503246472711684 0 310 0 -4.74265528 0.00864017 0.012519293184591884 0 313 0 -5.49281931 0.004099349 0.005926265742136525 0 -315 0 +315 0 ? ? ? 0 318 0 -4.994271 0.006731047 0.0097436773674529319 0 320 1 3.98940563 0.9818257 0.026461150371251282 1 322 0 -4.492697 0.0110665858 0.016054708591916572 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.243203 0.00525554 0.0076021355607147146 0 408 0 -2.50673819 0.07538716 0.11307869372093513 0 410 0 -5.243203 0.00525554 0.0076021355607147146 0 -411 0 +411 0 ? ? ? 0 412 1 4.41673565 0.9880704 0.017314215991460075 1 417 0 -5.243203 0.00525554 0.0076021355607147146 0 420 0 -2.416037 0.08195794 0.12336783697816152 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt index fc9c09274b..27120c9ca1 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-bin-norm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 3.60211229 0.973457634 0.038809901663734138 1 21 1 4.16481066 0.9847049 0.022236640568523543 1 22 0 -5.531984 0.003942524 0.0056991017626305011 0 -23 1 +23 1 ? ? ? 0 24 0 -5.641634 0.00353453052 0.0051082826744376919 0 25 1 1.07475233 0.7454996 0.42372049536525624 1 26 0 -5.05423546 0.00634177 0.0091783743058498103 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -2.63470244 0.06693815 0.099955373797007113 0 38 1 3.11990023 0.9577062 0.062344933763709205 1 39 1 1.3332 0.791369438 0.33757674381727276 1 -40 0 +40 0 ? ? ? 0 41 1 1.76657629 0.8540314 0.22763900783247884 1 42 1 5.77468061 0.996904433 0.0044728861234643764 1 43 1 -0.160024166 0.4600791 1.1200461625954479 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.98228741 0.00681164 0.0098607417433267518 0 137 0 -5.97203064 0.00254257885 0.0036728371153443573 0 138 0 -4.3374877 0.0129007176 0.018732896643962105 0 -139 0 +139 0 ? ? ? 0 140 0 -5.97203064 0.00254257885 0.0036728371153443573 0 141 0 -5.97203064 0.00254257885 0.0036728371153443573 0 142 1 3.62155676 0.973955452 0.03807230821683618 1 143 0 -4.06941748 0.0168002676 0.024443571870852193 0 144 0 -5.531984 0.003942524 0.0056991017626305011 0 -145 0 +145 0 ? ? ? 0 146 1 0.845272541 0.69957453 0.51545032982981998 1 147 0 -5.264244 0.00514667667 0.0074442579843589417 0 148 0 -3.53539133 0.0283218417 0.041449555087460649 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.63197279 0.9328912 0.1002192761113103 1 156 0 -4.95913267 0.00697009 0.010090922390098837 0 157 0 -5.091937 0.006108559 0.0088398149066821179 0 -158 0 +158 0 ? ? ? 0 159 1 7.637605 0.9995183 0.00069514934090023593 1 160 1 5.102776 0.9939569 0.00874476585784471 1 161 0 -4.27338028 0.013743096 0.019964600611336557 0 162 0 -5.091937 0.006108559 0.0088398149066821179 0 163 0 -3.67683315 0.0246785376 0.036050290649489301 0 -164 0 +164 0 ? ? ? 0 165 0 -3.45594358 0.0305921026 0.044824259180207618 0 166 1 4.88854 0.9925239 0.010826239558709188 1 167 1 2.59618044 0.930615366 0.10374308707957469 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.465872765 0.6144064 1.3748470144034199 1 233 1 3.621717 0.9739595 0.038066304453576054 1 234 0 -3.43166828 0.0313202776 0.045908353174351067 0 -235 0 +235 0 ? ? ? 0 236 1 6.647678 0.9987047 0.001869974861439897 1 237 1 4.42246 0.9881377 0.017215962974508822 1 238 1 6.44672251 0.9984168 0.0022859116128351314 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 6.46864176 0.998451054 0.0022363890442921863 1 247 1 1.5234189 0.8210414 0.28447311579312373 1 248 0 -4.00451946 0.0179065578 0.026067797200325149 0 -249 0 +249 0 ? ? ? 0 250 0 -5.39917946 0.00449994765 0.0065067030783201637 0 251 1 4.104112 0.983763337 0.023616805142920666 1 252 0 2.75052214 0.939942837 4.0575198580425882 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.62500715 0.8354845 0.2593150232737797 1 273 1 0.228233814 0.556812048 0.84473766721050514 1 274 0 -4.82307673 0.007977848 0.011555758238394069 0 -275 0 +275 0 ? ? ? 0 276 0 -5.531984 0.003942524 0.0056991017626305011 0 277 0 -6.0816803 0.00227912888 0.003291840622802274 0 278 0 -5.641634 0.00353453052 0.0051082826744376919 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.68027353 0.996598959 0.004915027880645002 1 290 0 -6.521727 0.00146896509 0.0021208267355545722 0 291 0 -5.641634 0.00353453052 0.0051082826744376919 0 -292 1 +292 1 ? ? ? 0 293 1 2.365685 0.9141729 0.12946106222132911 1 -294 0 +294 0 ? ? ? 0 295 1 4.049787 0.982872367 0.024924010557578272 1 296 0 1.7836585 0.856148 2.7973428610745215 1 -297 0 +297 0 ? ? ? 0 298 0 -3.845192 0.0209346656 0.030522958750931656 0 299 1 2.974937 0.95142895 0.071832170969213474 1 300 1 4.1471324 0.9844364 0.022630102350657457 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.437553883 0.607676 0.71862571139451958 1 313 0 -6.521727 0.00146896509 0.0021208267355545722 0 314 0 -6.0816803 0.00227912888 0.003291840622802274 0 -315 0 +315 0 ? ? ? 0 316 1 3.03096247 0.953953445 0.068009233653927595 1 317 1 3.97896051 0.9816384 0.026736450275361392 1 318 0 -5.641634 0.00353453052 0.0051082826744376919 0 319 0 1.61403608 0.833970964 2.5904925250318187 1 320 1 5.17757845 0.99439 0.0081162910482616441 1 -321 0 +321 0 ? ? ? 0 322 0 -5.091937 0.006108559 0.0088398149066821179 0 323 1 3.22697878 0.961837 0.056135679390163992 1 324 0 -5.641634 0.00353453052 0.0051082826744376919 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -2.996994 0.0475618578 0.070302697725435973 0 409 0 -4.887184 0.007486166 0.010840883662554117 0 410 0 -6.0816803 0.00227912888 0.003291840622802274 0 -411 0 +411 0 ? ? ? 0 412 1 4.5649085 0.989696443 0.014942001227255185 1 413 0 -3.34774446 0.0339691 0.049858759088118251 0 414 1 3.84952021 0.9791539 0.030392501392593196 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -5.531984 0.003942524 0.0056991017626305011 0 615 0 -3.69268847 0.02429977 0.035490127405006046 0 616 0 -5.531984 0.003942524 0.0056991017626305011 0 -617 0 +617 0 ? ? ? 0 618 0 -4.98228741 0.00681164 0.0098607417433267518 0 619 0 -4.43259048 0.0117441025 0.01704343535923129 0 620 0 -5.531984 0.003942524 0.0056991017626305011 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt index c637c69d47..777b0c9e2e 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 7.291877 0.999319434 0.00098218321186686951 1 35 0 -5.869519 0.002816277 0.0040687610645090766 0 37 0 -1.02334213 0.2643769 0.44296132773823416 0 -40 0 +40 0 ? ? ? 0 41 1 2.78505516 0.9418629 0.086411051010362436 1 44 1 7.821086 0.999599 0.00057866567352766714 1 45 0 -5.776886 0.00308877858 0.0044630618032660361 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.724151 0.008800117 0.01275207813790242 0 141 0 -6.55827045 0.00141632778 0.0020447774455947432 0 144 0 -5.869519 0.002816277 0.0040687610645090766 0 -145 0 +145 0 ? ? ? 0 147 0 -5.710598 0.00329976762 0.0047684300667823433 0 150 0 -5.60911131 0.003650946 0.0052768404018184306 0 151 1 5.118675 0.994051635 0.0086073014434059885 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -6.9242 0.000982723 0.0014184666437026428 0 156 0 -5.82082272 0.00295639853 0.0042714987401086318 0 161 0 -4.27493 0.0137221068 0.019933897944770858 0 -164 0 +164 0 ? ? ? 0 167 1 8.728978 0.9998382 0.00023348534112652949 1 169 0 -6.667038 0.00127054506 0.0018341745113224558 0 171 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 11.3055124 0.9999877 1.7714321792245208E-05 1 247 1 2.306137 0.909384 0.1370384544406823 1 248 0 -3.57901382 0.027145749 0.039704412233602736 0 -249 0 +249 0 ? ? ? 0 250 0 -6.50957441 0.00148689921 0.0021467385111997687 0 252 0 4.227894 0.9856265 6.1204466632636887 1 254 1 5.1967 0.9944957 0.0079629764358839714 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.546697 0.00388516486 0.0056160247410976614 0 271 0 -4.08297968 0.01657771 0.024117038815588843 0 272 1 4.140359 0.984332263 0.022782712303270784 1 -275 0 +275 0 ? ? ? 0 276 0 -5.50359 0.004055611 0.0058629065586406487 0 277 0 -6.235449 0.00195492059 0.002823114622841595 0 278 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.546697 0.00388516486 0.0056160247410976614 0 293 1 4.409566 0.9879857 0.017437977320811163 1 296 0 1.50613689 0.818488 2.4618631795162846 1 -297 0 +297 0 ? ? ? 0 299 1 7.93633366 0.9996426 0.00051569614495754321 1 300 1 7.85100842 0.9996108 0.00056163266102194308 1 301 0 -5.546697 0.00388516486 0.0056160247410976614 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 3.280983 0.9637706 0.05323826093940856 1 317 1 9.331164 0.999911368 0.00012787476790525535 1 319 0 2.956067 0.9505495 4.3378705942158717 1 -321 0 +321 0 ? ? ? 0 323 1 5.71478939 0.996714 0.0047485078589307303 1 327 0 -6.235449 0.00195492059 0.002823114622841595 0 328 1 1.37423134 0.7980629 0.32542559954710693 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 18.5483551 1 0 1 613 0 -5.86728668 0.00282255374 0.0040778419569695535 0 614 0 -5.931934 0.00264632422 0.0038228993852335589 0 -617 0 +617 0 ? ? ? 0 618 0 -5.1376605 0.00583713735 0.0084458830416492391 0 619 0 -4.771732 0.00839464 0.012162025043583415 0 621 0 0.3424616 0.5847883 1.2680810757859888 1 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.595036 0.01000083 0.014500778622910757 0 19 0 -3.32248163 0.03480794 0.051112046016137037 0 22 0 -5.58065033 0.00375595246 0.0054288957255828087 0 -23 1 +23 1 ? ? ? 0 24 0 -6.503868 0.00149539544 0.0021590142749859306 0 26 0 -5.548387 0.00387863023 0.0056065605311466102 0 27 0 -4.308096 0.0132804094 0.01928794203033625 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.58065033 0.00375595246 0.0054288957255828087 0 135 0 -3.21473217 0.038615074 0.056813911931726302 0 136 0 -4.944373 0.007072995 0.010240432848577178 0 -139 0 +139 0 ? ? ? 0 140 0 -5.929988 0.00265146513 0.0038303358354253489 0 142 1 3.36657238 0.966643333 0.048944424422816823 1 143 0 -5.92445755 0.0026661302 0.0038515494632620751 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.654668 0.009427353 0.013665310686569912 0 155 1 1.85960007 0.865250349 0.20881047650865889 1 157 0 -5.867591 0.00282169762 0.0040766033438554457 0 -158 0 +158 0 ? ? ? 0 159 1 9.591543 0.9999317 9.8549424786689596E-05 1 160 1 7.363619 0.9993665 0.00091420540184684017 1 162 0 -5.231313 0.00531806657 0.0076928219174680146 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.973879 0.9974621 0.0036660786976757281 1 232 0 1.10679436 0.751531 2.008862256839882 1 234 0 -3.7641573 0.0226616841 0.033070042201646284 0 -235 0 +235 0 ? ? ? 0 236 1 8.480452 0.9997926 0.00029928085261473974 1 238 1 9.448584 0.9999212 0.00011368501131740873 1 243 0 -4.52710867 0.0106962454 0.015514542524251164 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 11.9692574 0.9999937 9.115109290810302E-06 1 287 0 -5.58065033 0.00375595246 0.0054288957255828087 0 289 1 5.61943245 0.9963864 0.005222752234881816 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 5.155699 0.9942666 0.008295308421383105 1 298 0 -2.71633816 0.0620161332 0.092364986128816551 0 302 1 12.1483784 0.9999947 7.6532482629398447E-06 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -6.503868 0.00149539544 0.0021590142749859306 0 310 0 -6.216928 0.001991392 0.00287583593610438 0 313 0 -7.2025423 0.000744136 0.0010739609416730365 0 -315 0 +315 0 ? ? ? 0 318 0 -6.503868 0.00149539544 0.0021590142749859306 0 320 1 4.269491 0.9862041 0.02004186218454097 1 322 0 -5.231313 0.00531806657 0.0076928219174680146 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -6.853205 0.00105495122 0.0015227762658880663 0 408 0 -4.852546 0.00774797052 0.011221486878031036 0 410 0 -6.853205 0.00105495122 0.0015227762658880663 0 -411 0 +411 0 ? ? ? 0 412 1 7.587702 0.9994936 0.00073076734716613276 1 417 0 -6.853205 0.00105495122 0.0015227762658880663 0 420 0 -3.639381 0.0255962238 0.037408370576337828 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt index 3c3d96d974..157c16ab10 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-non-negative-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 6.59719467 0.9986377 0.0019667577188335239 1 21 1 6.9093914 0.999002635 0.0014396108762453246 1 22 0 -5.50264263 0.00405943953 0.0058684526611357517 0 -23 1 +23 1 ? ? ? 0 24 0 -6.14459133 0.002140461 0.0030913421022840675 0 25 1 0.9981127 0.7306873 0.4526739227114121 1 26 0 -5.56536674 0.00381357735 0.0055123467049374769 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.79816341 0.142074779 0.22107619020053021 0 38 1 4.925646 0.9927943 0.010433298358924143 1 39 1 0.895828247 0.7100915 0.49392321501990555 1 -40 0 +40 0 ? ? ? 0 41 1 2.72234154 0.93833214 0.091829412491125792 1 42 1 6.45601654 0.998431444 0.0022647243580549464 1 43 1 -0.5776577 0.359471738 1.4760497444221736 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.965955 0.006923029 0.010022553277705419 0 137 0 -5.93407059 0.00264069065 0.0038147503192492542 0 138 0 -4.668453 0.009299485 0.013479092815885742 0 -139 0 +139 0 ? ? ? 0 140 0 -5.93407059 0.00264069065 0.0038147503192492542 0 141 0 -6.47075844 0.00154565845 0.0022316389032971582 0 142 1 3.64823437 0.9746237 0.037082819238846716 1 143 0 -5.466771 0.00420707744 0.0060823331671915537 0 144 0 -6.03933048 0.002377488 0.0034340739321673348 0 -145 0 +145 0 ? ? ? 0 146 1 -0.110341072 0.4724427 1.0817887726099731 0 147 0 -6.11751556 0.002199078 0.0031760924132441036 0 148 0 -1.55709267 0.174064219 0.2758984827724662 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.36491013 0.9141121 0.12955701132443528 1 156 0 -6.267907 0.00189260417 0.002733037741461919 0 157 0 -5.607903 0.003655344 0.0052832085489667481 0 -158 0 +158 0 ? ? ? 0 159 1 10.9176846 0.9999819 2.614159977229895E-05 1 160 1 7.75989342 0.9995737 0.00061514108379339587 1 161 0 -4.309515 0.0132618267 0.019260772318565499 0 162 0 -5.07121468 0.006235666 0.0090243302579491296 0 163 0 -3.76092339 0.02273342 0.033175939048272665 0 -164 0 +164 0 ? ? ? 0 165 0 -3.924346 0.0193723515 0.028222656292276067 0 166 1 6.74228859 0.998821437 0.001701309224545778 1 167 1 7.94067574 0.99964416 0.0005134595729021004 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.7225275 0.673163354 1.6133583443719168 1 233 1 5.29774761 0.995022058 0.0071995859979441072 1 234 0 -3.554101 0.027811477 0.040691992083778071 0 -235 0 +235 0 ? ? ? 0 236 1 10.4206543 0.9999702 4.2996303413732479E-05 1 237 1 5.828807 0.997067034 0.0042375925496323883 1 238 1 11.3587952 0.9999883 1.6854398235588073E-05 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.599171 0.9999322 9.7775450197580527E-05 1 247 1 2.067935 0.887747347 0.17177895049852415 1 248 0 -3.4094696 0.0320008248 0.046922276673100262 0 -249 0 +249 0 ? ? ? 0 250 0 -6.699335 0.0012302153 0.0017759181240158319 0 251 1 7.9959507 0.9996633 0.00048584679587356108 1 252 0 3.6111412 0.9736899 5.2482402117884597 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.72366619 0.848600447 0.23684265601696217 1 273 1 -0.342484474 0.4152061 1.2681004399882052 0 274 0 -4.846204 0.007796883 0.011292605602753639 0 -275 0 +275 0 ? ? ? 0 276 0 -5.50264263 0.00405943953 0.0058684526611357517 0 277 0 -6.57601929 0.0013914461 0.0020088303041534395 0 278 0 -6.14459133 0.002140461 0.0030913421022840675 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 7.101452 0.999176741 0.0011882013529343118 1 290 0 -7.007447 0.0009042981 0.0013052166206428517 0 291 0 -6.14459133 0.002140461 0.0030913421022840675 0 -292 1 +292 1 ? ? ? 0 293 1 4.6855 0.9908563 0.013252265463070151 1 -294 0 +294 0 ? ? ? 0 295 1 6.2168417 0.99800843 0.0028760930785605436 1 296 0 1.02090549 0.735148966 1.9167469555439196 1 -297 0 +297 0 ? ? ? 0 298 0 -3.02001572 0.0465297773 0.068740211727588499 0 299 1 7.09773827 0.9991737 0.0011925905306341051 1 300 1 6.64691544 0.998703659 0.001871438610749495 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 2.90730953 0.9482066 0.076726654530730437 1 313 0 -7.007447 0.0009042981 0.0013052166206428517 0 314 0 -6.649353 0.001293185 0.0018668790298154393 0 -315 0 +315 0 ? ? ? 0 316 1 2.12067223 0.892896235 0.16343556787762994 1 317 1 7.5715065 0.9994854 0.00074264021133653045 1 318 0 -5.92458725 0.00266578537 0.0038510506602204577 0 319 0 1.35382557 0.7947544 2.2845767018893883 1 320 1 5.988476 0.9974988 0.0036129742419820916 1 -321 0 +321 0 ? ? ? 0 322 0 -5.07121468 0.006235666 0.0090243302579491296 0 323 1 4.28225327 0.986376643 0.019789456912749203 1 324 0 -6.14459133 0.002140461 0.0030913421022840675 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -4.58592224 0.0100914678 0.014632868867702545 0 409 0 -5.205141 0.00545831956 0.0078962603748794508 0 410 0 -6.57601929 0.0013914461 0.0020088303041534395 0 -411 0 +411 0 ? ? ? 0 412 1 7.70175171 0.9995482 0.00065196153701951257 1 413 0 -3.70033741 0.02411908 0.035222977147123087 0 414 1 5.41945839 0.99559 0.006376396588275309 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.112665 0.00220974674 0.003191518193469859 0 615 0 -4.37095165 0.012481451 0.018120247723782505 0 616 0 -5.50264263 0.00405943953 0.0058684526611357517 0 -617 0 +617 0 ? ? ? 0 618 0 -4.965955 0.006923029 0.010022553277705419 0 619 0 -4.429267 0.0117827384 0.017099838701332234 0 620 0 -5.50264263 0.00405943953 0.0058684526611357517 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt index 4a5221f10c..ca68c5efee 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 4.28022528 0.9863494 0.019829298264044417 1 35 0 -4.29419041 0.0134638669 0.019556202484947195 0 37 0 -1.95759869 0.123727158 0.19054794774570458 0 -40 0 +40 0 ? ? ? 0 41 1 1.34579754 0.7934417 0.33380384997661361 1 44 1 5.22175264 0.9946311 0.0077665361772382407 1 45 0 -4.55969143 0.0103568994 0.015019761369081966 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -3.50701833 0.0291131958 0.042624993467054707 0 141 0 -4.510108 0.0108776484 0.015779105577069866 0 144 0 -4.29419041 0.0134638669 0.019556202484947195 0 -145 0 +145 0 ? ? ? 0 147 0 -4.118532 0.0160079524 0.023281438797448952 0 150 0 -4.43784046 0.0116833262 0.016954714516170224 0 151 1 2.86513376 0.9460957 0.079941963974142813 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -4.792855 0.008220622 0.011908867541972762 0 156 0 -4.152423 0.0154827805 0.022511655407603286 0 161 0 -3.361469 0.0335215963 0.049190599682179155 0 -164 0 +164 0 ? ? ? 0 167 1 3.00652266 0.9528679 0.069651835538374979 1 169 0 -4.76463461 0.008453925 0.012248282163630898 0 171 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 7.761687 0.9995744 0.00061410874818056485 1 247 1 2.24705172 0.904395938 0.14497358262585658 1 248 0 -2.75623083 0.05973572 0.088861781570551343 0 -249 0 +249 0 ? ? ? 0 250 0 -4.36834049 0.0125136767 0.018167327858314108 0 252 0 2.47272253 0.9222073 3.6842213333365441 1 254 1 5.30986643 0.9950817 0.0071130806432035459 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -4.36102 0.0126044592 0.01829996516606797 0 271 0 -3.23003125 0.0380511023 0.055967840193562365 0 272 1 1.97665262 0.878323853 0.18717511071531998 1 -275 0 +275 0 ? ? ? 0 276 0 -4.011443 0.0177852046 0.025889540399059593 0 277 0 -4.57693768 0.010181617 0.014764258553784819 0 278 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -4.36102 0.0126044592 0.01829996516606797 0 293 1 3.50341845 0.9707849 0.042776422691022505 1 296 0 0.854511261 0.701512635 1.7442582303419563 1 -297 0 +297 0 ? ? ? 0 299 1 4.02451658 0.9824417 0.025556263019683039 1 300 1 4.165831 0.9847203 0.02221411037855156 1 301 0 -4.36102 0.0126044592 0.01829996516606797 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 2.756329 0.940269768 0.088853362072482428 1 317 1 5.80636454 0.9970007 0.004333585623359653 1 319 0 1.0757966 0.7456977 1.975383454841898 1 -321 0 +321 0 ? ? ? 0 323 1 3.082415 0.956161559 0.064673689924879513 1 327 0 -4.57693768 0.010181617 0.014764258553784819 0 328 1 2.805962 0.942997158 0.084674672619554123 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 9.154173 0.999894261 0.00015255667702522208 1 613 0 -3.88851619 0.0200648643 0.029241838018723456 0 614 0 -4.371011 0.0124807227 0.018119183737570529 0 -617 0 +617 0 ? ? ? 0 618 0 -3.72869563 0.0234605316 0.034249740786795546 0 619 0 -3.44594836 0.0308899172 0.045267541796380997 0 621 0 -0.782016754 0.3138854 0.54347850919806506 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -3.6590457 0.0251103118 0.036689112229595131 0 19 0 -2.927483 0.0508115776 0.075233591312011905 0 22 0 -4.14026976 0.0156691261 0.022784748886103094 0 -23 1 +23 1 ? ? ? 0 24 0 -4.75638962 0.008523319 0.012349253326730132 0 26 0 -3.93837214 0.0191076845 0.027833331950969452 0 27 0 -3.40870714 0.03202445 0.046957488715061987 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -4.14026976 0.0156691261 0.022784748886103094 0 135 0 -2.62711 0.06741392 0.1006911970043132 0 136 0 -3.77448845 0.0224339925 0.032733975062274864 0 -139 0 +139 0 ? ? ? 0 140 0 -4.255713 0.0139846308 0.020317960583175827 0 142 1 1.9862442 0.879345238 0.18549840430247841 1 143 0 -3.74331784 0.02312786 0.033758349764067219 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -3.267953 0.0366871059 0.053923618256310278 0 155 1 1.88906479 0.8686489 0.20315494639328185 1 157 0 -4.39060831 0.0122414771 0.017769705367455636 0 -158 0 +158 0 ? ? ? 0 159 1 6.88715744 0.9989802 0.001471976257367614 1 160 1 5.57244873 0.996213257 0.0054734847881935346 1 162 0 -4.024827 0.0175529048 0.025548374564996635 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 4.531541 0.989350557 0.015446292097008558 1 232 0 0.251759052 0.562609434 1.1930059918223119 1 234 0 -2.51516485 0.0748018846 0.11216576762239737 0 -235 0 +235 0 ? ? ? 0 236 1 5.772835 0.9968987 0.0044811669482472481 1 238 1 6.65452766 0.9987135 0.0018572316949005939 1 243 0 -3.28154564 0.036209736 0.053208867556931244 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 8.09413 0.999694765 0.00044042879818134223 1 287 0 -4.14026976 0.0156691261 0.022784748886103094 0 289 1 4.30420876 0.9866685 0.019362604522295365 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 3.601099 0.973431468 0.038848681676236112 1 298 0 -2.22490883 0.09753586 0.14805849380018501 0 302 1 8.018049 0.9996706 0.00047526633912005399 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -4.75638962 0.008523319 0.012349253326730132 0 310 0 -4.506051 0.0109213842 0.015842898366274911 0 313 0 -4.987275 0.00677798 0.0098118479335557937 0 -315 0 +315 0 ? ? ? 0 318 0 -4.75638962 0.008523319 0.012349253326730132 0 320 1 2.81792259 0.943636656 0.08369663336324043 1 322 0 -4.024827 0.0175529048 0.025548374564996635 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -4.87183237 0.00760109862 0.011007956774122585 0 408 0 -3.31706572 0.03499035 0.051384728090851442 0 410 0 -4.87183237 0.00760109862 0.011007956774122585 0 -411 0 +411 0 ? ? ? 0 412 1 5.23884 0.9947216 0.007635302704268032 1 417 0 -4.87183237 0.00760109862 0.011007956774122585 0 420 0 -2.70138979 0.0628913939 0.093711836328273818 0 diff --git a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt index ff73fc5189..1f68afb149 100644 --- a/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/LogisticRegression/LogisticRegression-norm-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 3.96267843 0.9813426 0.027171185979008489 1 21 1 5.265054 0.9948575 0.0074382155674881083 1 22 0 -4.51155567 0.0108620832 0.015756402974357524 0 -23 1 +23 1 ? ? ? 0 24 0 -5.107146 0.00601691334 0.0087067913827365544 0 25 1 0.6726794 0.6621029 0.59487269328676762 1 26 0 -4.36255264 0.0125854 0.018272117354929882 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -2.04001045 0.115065664 0.17635768683092809 0 38 1 3.6194644 0.973902345 0.038150977482211336 1 39 1 0.673018932 0.6621788 0.59470724065030856 1 -40 0 +40 0 ? ? ? 0 41 1 1.75375986 0.8524264 0.23035280375662126 1 42 1 5.60796976 0.9963449 0.0052828205055256471 1 43 1 -0.5184488 0.37321502 1.4219210475645763 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -4.10382557 0.0162412636 0.023623552162962799 0 137 0 -4.73142624 0.008736885 0.01266004645418743 0 138 0 -3.874074 0.0203508064 0.029662873497674963 0 -139 0 +139 0 ? ? ? 0 140 0 -4.73142624 0.008736885 0.01266004645418743 0 141 0 -5.13915634 0.00582846347 0.0084332958699167727 0 142 1 2.53462648 0.9265339 0.11008436686792368 1 143 0 -4.1287837 0.0158472732 0.02304587572398243 0 144 0 -4.919286 0.00725137955 0.010499643937654568 0 -145 0 +145 0 ? ? ? 0 146 1 0.185849667 0.546329141 0.87215771785369722 1 147 0 -4.93812275 0.007117027 0.010304411526052911 0 148 0 -2.08798218 0.110270388 0.16856112656874939 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.24833822 0.9045071 0.14479626651911345 1 156 0 -4.913893 0.007290303 0.010556209568305783 0 157 0 -4.69941568 0.009018519 0.013069997580773615 0 -158 0 +158 0 ? ? ? 0 159 1 8.213823 0.9997292 0.0003907114930137318 1 160 1 6.409885 0.9983575 0.0023716112041259128 1 161 0 -3.70981741 0.0238969475 0.034894625912887001 0 162 0 -4.29168558 0.0134971775 0.019604916230060426 0 163 0 -4.20999336 0.0146292737 0.021261482597556794 0 -164 0 +164 0 ? ? ? 0 165 0 -3.35529184 0.0337223038 0.049490233625727506 0 166 1 5.13335562 0.9941378 0.0084822193431077515 1 167 1 4.31503153 0.986810148 0.019155543364578753 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.387140751 0.5955942 1.3061245037640643 1 233 1 4.22282743 0.9855546 0.020992330945643928 1 234 0 -2.79013348 0.0578596778 0.085986144354393876 0 -235 0 +235 0 ? ? ? 0 236 1 7.198673 0.999253 0.0010781320275837761 1 237 1 4.24964952 0.9859315 0.020440656601130012 1 238 1 7.93097258 0.9996407 0.00051844885378563097 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 8.043116 0.9996788 0.00046348169408102081 1 247 1 2.170814 0.8975978 0.15585897141376154 1 248 0 -3.01326466 0.0468302034 0.069194857755858583 0 -249 0 +249 0 ? ? ? 0 250 0 -5.133764 0.005859794 0.0084787621145538308 0 251 1 5.4693675 0.9958038 0.0060666126630314994 1 252 0 2.521253 0.925618351 3.7489094481346643 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.26803923 0.7804069 0.35770157547115156 1 273 1 -0.341918468 0.415343523 1.2676230380461255 0 274 0 -4.11754751 0.01602347 0.023304190486054636 0 -275 0 +275 0 ? ? ? 0 276 0 -4.51155567 0.0108620832 0.015756402974357524 0 277 0 -5.327016 0.00483505568 0.0069924288920724343 0 278 0 -5.107146 0.00601691334 0.0087067913827365544 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.13687468 0.9941583 0.0084524642023233402 1 290 0 -5.54688644 0.00388443237 0.005614963867042919 0 291 0 -5.107146 0.00601691334 0.0087067913827365544 0 -292 1 +292 1 ? ? ? 0 293 1 3.96790457 0.981438041 0.027030903251222888 1 -294 0 +294 0 ? ? ? 0 295 1 4.55162954 0.9895601 0.015140724489837425 1 296 0 0.6700988 0.661525249 1.5628798789895393 1 -297 0 +297 0 ? ? ? 0 298 0 -2.40630245 0.08269336 0.12452401321321592 0 299 1 4.497334 0.988984048 0.015980843395464565 1 300 1 4.62145948 0.990257442 0.014124456553393392 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 1.84912252 0.864024043 0.21085663625525078 1 313 0 -5.54688644 0.00388443237 0.005614963867042919 0 314 0 -5.36138344 0.004672473 0.0067567510712633252 0 -315 0 +315 0 ? ? ? 0 316 1 2.20764112 0.900933564 0.15050737158003294 1 317 1 5.890983 0.997243345 0.0039825045312741787 1 318 0 -5.00404263 0.006666029 0.0096492437155483984 0 319 0 0.6469283 0.656317949 1.540853588056835 1 320 1 3.99139547 0.9818612 0.026409039376297374 1 -321 0 +321 0 ? ? ? 0 322 0 -4.29168558 0.0134971775 0.019604916230060426 0 323 1 3.157802 0.959215045 0.060073807756361522 1 324 0 -5.107146 0.00601691334 0.0087067913827365544 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -3.57900143 0.0271460768 0.039704898382857935 0 409 0 -4.281804 0.0136293843 0.019798272915656116 0 410 0 -5.327016 0.00483505568 0.0069924288920724343 0 -411 0 +411 0 ? ? ? 0 412 1 6.193191 0.997960865 0.0029448527311883681 1 413 0 -3.24647379 0.0374538042 0.055072312735252281 0 414 1 4.55400229 0.9895846 0.015105009633995839 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -4.95365334 0.00700811762 0.010146170993954725 0 615 0 -3.64432216 0.0254732724 0.037226341208279841 0 616 0 -4.51155567 0.0108620832 0.015756402974357524 0 -617 0 +617 0 ? ? ? 0 618 0 -4.10382557 0.0162412636 0.023623552162962799 0 619 0 -3.696096 0.0242191125 0.035370869193417386 0 620 0 -4.51155567 0.0108620832 0.015756402974357524 0 diff --git a/test/BaselineOutput/SingleRelease/NAReplace/featurized.tsv b/test/BaselineOutput/SingleRelease/NAReplace/featurized.tsv index f9ba7bf9f4..5ca9707e3e 100644 --- a/test/BaselineOutput/SingleRelease/NAReplace/featurized.tsv +++ b/test/BaselineOutput/SingleRelease/NAReplace/featurized.tsv @@ -1,14 +1,13 @@ #@ TextLoader{ #@ header+ #@ sep=tab -#@ col=A:TX:0 -#@ col=B:R4:1 -#@ col=C:R8:2 -#@ col=D:TX:3-6 -#@ col=E:R4:7-10 +#@ col=A:R4:0 +#@ col=B:R8:1 +#@ col=C:R4:2-5 +#@ col=D:R8:6-9 #@ } -A B C 8 0:"" -5 5 5 5 1 1 1 5 1 1 1 -5 5 5 5 4 4 5 5 4 4 5 -3 3 3 3 1 1 1 3 1 1 1 -6 6 6 6 8 8 1 6 8 8 1 +A B 8 0:"" +5 5 5 1 1 1 5 1 1 1 +5 5 5 4 4 5 5 4 4 5 +3 3 3 1 1 1 3 1 1 1 +6 6 6 8 8 1 6 8 8 1 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-CV-breast-cancer.txt index 7cfbe77b4a..cb14fcc5fe 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.8960438 0.7101358 0.49383312022081494 1 35 0 -1.47149158 0.186716 0.298168872554196 0 37 0 -1.077555 0.253969 0.42269254099397019 0 -40 0 +40 0 ? ? ? 0 41 1 -0.110265851 0.472461432 1.0817315302478872 0 44 1 1.38623238 0.799990058 0.32194602419012386 1 45 0 -1.49285364 0.1834938 0.29246424185161274 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.329715 0.2092065 0.33862709419515019 0 141 0 -1.51333463 0.18044512 0.28708753442168577 0 144 0 -1.47149158 0.186716 0.298168872554196 0 -145 0 +145 0 ? ? ? 0 147 0 -1.36272907 0.203797117 0.3287919993163349 0 150 0 -1.490515 0.183844447 0.29308395009459498 0 151 1 0.488354921 0.6197188 0.69031438298058734 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.54631257 0.175619483 0.27861768553552435 0 156 0 -1.34795034 0.206205666 0.33316283039965772 0 161 0 -1.29642129 0.214767918 0.34880897708810421 0 -164 0 +164 0 ? ? ? 0 167 1 0.7817354 0.686054 0.54360597630084673 1 169 0 -1.52040219 0.1794023 0.28525299602441972 0 171 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.30008841 0.908884346 0.13783136852928218 1 247 1 0.283135176 0.5703147 0.81016986229628651 1 248 0 -1.02733588 0.263600916 0.44144026210768955 0 -249 0 +249 0 ? ? ? 0 250 0 -1.3897934 0.199440747 0.32091990872905618 0 252 0 0.575369835 0.6400013 1.4739363860396681 1 254 1 1.36448383 0.796487451 0.32827646395944027 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.46262646 0.188065946 0.3005655398637459 0 271 0 -1.330715 0.209041134 0.33832542505991492 0 272 1 0.3417368 0.58461237 0.77444774011663531 1 -275 0 +275 0 ? ? ? 0 276 0 -1.43851376 0.1917756 0.30717219734355156 0 277 0 -1.50446939 0.181759879 0.28940381583950159 0 278 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.46262646 0.188065946 0.3005655398637459 0 293 1 0.690094 0.665987849 0.58643223892348229 1 296 0 0.122398376 0.530561447 1.090991766168943 1 -297 0 +297 0 ? ? ? 0 299 1 0.8090725 0.6919118 0.53133991521179469 1 300 1 1.07804394 0.7461236 0.42251343016986392 1 301 0 -1.46262646 0.188065946 0.3005655398637459 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.633879662 0.653368652 0.61403085786632716 1 317 1 1.75457263 0.852528632 0.2301798078479787 1 319 0 0.1361382 0.5339821 1.1015427183713855 1 -321 0 +321 0 ? ? ? 0 323 1 0.8411846 0.6987147 0.51722465664834216 1 327 0 -1.50446939 0.181759879 0.28940381583950159 0 328 1 0.758497 0.681027353 0.55421535102487163 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.80018759 0.942685962 0.085150850944065576 1 613 0 -1.31009483 0.212470978 0.34459500515903269 0 614 0 -1.49938011 0.18251799 0.29074111357003468 0 -617 0 +617 0 ? ? ? 0 618 0 -1.40553582 0.196939126 0.31641874253214702 0 619 0 -1.37255788 0.2022069 0.32591343959007929 0 621 0 -0.609831333 0.35209766 0.62615172722986678 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.29125416 0.2156406 0.35041324245427652 0 19 0 -1.18682528 0.233827218 0.38425832019246531 0 22 0 -1.38527489 0.200163171 0.32222238184143043 0 -23 1 +23 1 ? ? ? 0 24 0 -1.44789767 0.19032532 0.30458573193898042 0 26 0 -1.321662 0.210541919 0.341065431407162 0 27 0 -1.28084588 0.217406273 0.35366454853946033 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.36420059 0.203558445 0.3283595980104454 0 135 0 -0.9835708 0.272183836 0.45835400169661483 0 136 0 -1.33306038 0.2086536 0.33761874206668624 0 -139 0 +139 0 ? ? ? 0 140 0 -1.42708111 0.1935539 0.31034997353186378 0 142 1 0.5200758 0.6271655 0.67308190344279184 1 143 0 -1.07713079 0.254049361 0.42284792669686094 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.147677 0.240913659 0.39766410362601251 0 155 1 0.775262833 0.6846582 0.54654409786019487 1 157 0 -1.39568317 0.198502019 0.31922920878555094 0 -158 0 +158 0 ? ? ? 0 159 1 2.4702127 0.922027051 0.11711901622998303 1 160 1 1.8089292 0.8592324 0.21887965615084526 1 162 0 -1.34346867 0.206940219 0.33449847387485032 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.80702519 0.859001935 0.21926671447669044 1 232 0 -0.09305465 0.476753116 0.93443627885443414 0 234 0 -0.7166654 0.3281277 0.57374106872220532 0 -235 0 +235 0 ? ? ? 0 236 1 2.0308764 0.884000957 0.1778801634200064 1 238 1 2.5700233 0.9289072 0.10639359546534205 1 243 0 -0.9099059 0.2870191 0.48806467358561068 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 3.03633 0.954188645 0.067653576940859037 1 287 0 -1.36420059 0.203558445 0.3283595980104454 0 289 1 1.839293 0.8628651 0.21279308434601185 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.50403929 0.81817615 0.28951661133253503 1 298 0 -1.01809835 0.265398 0.44496526256029006 0 302 1 2.95740485 0.9506123 0.07307101604489151 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.44789767 0.19032532 0.30458573193898042 0 310 0 -1.4164151 0.1952242 0.31334116395889644 0 313 0 -1.53151011 0.17777285 0.28239108395025142 0 -315 0 +315 0 ? ? ? 0 318 0 -1.38467479 0.200259253 0.32239570012101715 0 320 1 1.03225207 0.737352252 0.43957409861257096 1 322 0 -1.34346867 0.206940219 0.33449847387485032 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.48970389 0.183966175 0.29329914054619283 0 408 0 -0.969954848 0.2748895 0.46372722780050113 0 410 0 -1.48970389 0.183966175 0.29329914054619283 0 -411 0 +411 0 ? ? ? 0 412 1 1.784699 0.8562761 0.22385204529486596 1 417 0 -1.48970389 0.183966175 0.29329914054619283 0 420 0 -0.827743053 0.3041225 0.52309474947909951 0 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-CV-breast-cancer.txt index 0302f143a8..4462030630 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 5.83416653 0.997082651 0.0042149967260249437 1 35 0 -5.20735168 0.005446332 0.0078788712300762211 0 37 0 -2.71164179 0.0622898862 0.092786101770516077 0 -40 0 +40 0 ? ? ? 0 41 1 1.76036549 0.853255451 0.22895036937850363 1 44 1 7.40955639 0.999394953 0.00087316212305162816 1 45 0 -5.421812 0.004399689 0.0063614137834035649 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -4.322978 0.0130867995 0.019004890409612719 0 141 0 -5.4411335 0.00431586 0.0062399450406013428 0 144 0 -5.20735168 0.005446332 0.0078788712300762211 0 -145 0 +145 0 ? ? ? 0 147 0 -4.875398 0.00757424766 0.010968922848962245 0 150 0 -5.39204836 0.00453200564 0.006553162786217457 0 151 1 4.061223 0.9830638 0.024643020624724253 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -5.73325 0.00322609954 0.0046618015694471927 0 156 0 -4.91463 0.007284973 0.010548463613035559 0 161 0 -4.12887526 0.0158458445 0.023043781432081138 0 -164 0 +164 0 ? ? ? 0 167 1 4.796111 0.9918059 0.011870271192039138 1 169 0 -5.68914 0.00337109854 0.0048716832359580091 0 171 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 10.8528 0.9999806 2.7947451458241805E-05 1 247 1 3.03624249 0.95418483 0.06765934462197544 1 248 0 -3.08702755 0.04364554 0.064382659498109923 0 -249 0 +249 0 ? ? ? 0 250 0 -5.14841127 0.00577508053 0.0083558311280958943 0 252 0 3.84936237 0.979150653 5.5838539825714451 1 254 1 7.35057163 0.9993582 0.00092625186884288718 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -5.265687 0.005139294 0.0074335520922891698 0 271 0 -4.097221 0.016347127 0.023778810630558714 0 272 1 2.79984665 0.942667544 0.085179038039461821 1 -275 0 +275 0 ? ? ? 0 276 0 -4.915235 0.007280598 0.010542105680653375 0 277 0 -5.49946833 0.00407229364 0.0058870729354622087 0 278 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -5.265687 0.005139294 0.0074335520922891698 0 293 1 4.736758 0.991309166 0.012593024732102163 1 296 0 1.75365162 0.85241276 2.760360102190484 1 -297 0 +297 0 ? ? ? 0 299 1 5.53140068 0.9960552 0.0057024187918531082 1 300 1 6.04150867 0.9976277 0.0034266068641119179 1 301 0 -5.265687 0.005139294 0.0074335520922891698 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 4.025776 0.9824635 0.025524315591638584 1 317 1 8.423935 0.9997805 0.00031674081892699212 1 319 0 1.82892418 0.861633539 2.8534338101579171 1 -321 0 +321 0 ? ? ? 0 323 1 4.504982 0.9890671 0.015859728309184334 1 327 0 -5.49946833 0.00407229364 0.0058870729354622087 0 328 1 4.454892 0.9885119 0.016669732083571809 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 13.2412815 0.9999982 2.5797420694119618E-06 1 613 0 -4.637956 0.009584702 0.013894495676017315 0 614 0 -5.333713 0.00480293762 0.0069458679549985972 0 -617 0 +617 0 ? ? ? 0 618 0 -4.623119 0.009726578 0.014101176192227349 0 619 0 -4.331002 0.0129835671 0.018853990456118342 0 621 0 -0.969069 0.2750661 0.46407865469425469 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -4.77261448 0.00838729553 0.012151339639068351 0 19 0 -4.05630064 0.01701831 0.02476355221691592 0 22 0 -5.26293468 0.00515338546 0.0074539868149001397 0 -23 1 +23 1 ? ? ? 0 24 0 -5.847086 0.00287998538 0.0041609352152631573 0 26 0 -5.01217842 0.00661237258 0.0095713166824383505 0 27 0 -4.54662037 0.0104917344 0.015216336343473301 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -5.25242233 0.005207562 0.0075325538981779582 0 135 0 -3.511651 0.028982535 0.042430850287558594 0 136 0 -4.90477753 0.0073565715 0.010652520453755514 0 -139 0 +139 0 ? ? ? 0 140 0 -5.39509726 0.00451827142 0.0065332584338255633 0 142 1 2.2592926 0.9054491 0.14329456494743992 1 143 0 -4.372905 0.0124574006 0.018085112102952432 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -4.16768646 0.01525183 0.022173264390610525 0 155 1 2.62865639 0.9326832 0.10054091831709649 1 157 0 -5.488929 0.004115263 0.0059493194428234175 0 -158 0 +158 0 ? ? ? 0 159 1 8.939568 0.9998689 0.00018910731810054123 1 160 1 6.86695 0.9989594 0.0015020181784367139 1 162 0 -5.13077164 0.00587725034 0.0085040948177142958 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 5.89666176 0.9972589 0.0039599989302112676 1 232 0 -0.179828167 0.455163717 0.87610531376236689 0 234 0 -3.15223122 0.04100345 0.060402470875188088 0 -235 0 +235 0 ? ? ? 0 236 1 7.74873161 0.9995689 0.00062210936850165717 1 238 1 9.156269 0.99989444 0.00015229867579129541 1 243 0 -3.844254 0.0209539 0.030551300678084092 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 10.8717079 0.999981 2.7431493603031304E-05 1 287 0 -5.25242233 0.005207562 0.0075325538981779582 0 289 1 6.0516243 0.9976515 0.0033921289521523556 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 4.83148575 0.992088437 0.011459363337312383 1 298 0 -3.31150723 0.0351785272 0.051666079123835182 0 302 1 10.6722527 0.9999768 3.3451013395372324E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -5.847086 0.00287998538 0.0041609352152631573 0 310 0 -5.61057949 0.00364560937 0.005269112963217676 0 313 0 -6.111411 0.00221251347 0.0031955185815049345 0 -315 0 +315 0 ? ? ? 0 318 0 -5.81555 0.002971982 0.0042940476081927553 0 320 1 3.90395832 0.98023653 0.028798182473169599 1 322 0 -5.13077164 0.00587725034 0.0085040948177142958 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -5.97924852 0.002524339 0.0036464555896367997 0 408 0 -4.06157875 0.01693024 0.024634297974768089 0 410 0 -5.97924852 0.002524339 0.0036464555896367997 0 -411 0 +411 0 ? ? ? 0 412 1 6.142968 0.9978561 0.003096342326011223 1 417 0 -5.97924852 0.002524339 0.0036464555896367997 0 420 0 -3.33492 0.0343924649 0.050491160716171948 0 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt index 22409b2a5a..a0b287a08e 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-L1-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 4.078068 0.983342052 0.024234755458090462 1 21 1 5.07943726 0.993815064 0.0089506841986991444 1 22 0 -6.401946 0.00165558141 0.0023904784483303873 0 -23 1 +23 1 ? ? ? 0 24 0 -6.95376 0.000954126066 0.0013771700453364219 0 25 1 0.131910324 0.532929838 0.90798248584253316 1 26 0 -6.20803738 0.00200913986 0.0029014918436988269 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -3.97515774 0.018430287 0.026837360561959296 0 38 1 3.85910988 0.9793487 0.030105439588229164 1 39 1 0.289131165 0.5717834 0.806459299491597 1 -40 0 +40 0 ? ? ? 0 41 1 0.5163593 0.626296043 0.67508333009285992 1 42 1 6.25112057 0.9980754 0.0027792492014372186 1 43 1 -1.67306614 0.158015817 2.6618591152960773 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -6.01794958 0.00242874329 0.0035081978824050039 0 137 0 -6.618124 0.00133415242 0.0019260601909807822 0 138 0 -5.72209167 0.0032621813 0.0047140259759722502 0 -139 0 +139 0 ? ? ? 0 140 0 -6.618124 0.00133415242 0.0019260601909807822 0 141 0 -7.00212 0.00090912357 0.0013121846250039875 0 142 1 1.86599445 0.8659941 0.20757090594664257 1 143 0 -5.405919 0.00446985662 0.0064630953185838089 0 144 0 -6.785942 0.00112826866 0.001628666561573529 0 -145 0 +145 0 ? ? ? 0 146 1 -0.2344532 0.441653728 1.17901240396708 0 147 0 -6.632551 0.00131506776 0.0018984903241446914 0 148 0 -3.505578 0.0291539337 0.042685529480640867 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 2.2497673 0.9046305 0.14459948541676573 1 156 0 -6.63783455 0.0013081471 0.0018884928122825894 0 157 0 -6.569764 0.00140016491 0.0020214264686188826 0 -158 0 +158 0 ? ? ? 0 159 1 9.262019 0.99990505 0.00013699068516156971 1 160 1 6.81782627 0.9989071 0.001577599187795759 1 161 0 -5.57644749 0.00377171184 0.005451717595353108 0 162 0 -6.18576765 0.002054292 0.0029667652410511319 0 163 0 -5.441502 0.00431427639 0.0062376503223464629 0 -164 0 +164 0 ? ? ? 0 165 0 -5.081398 0.00617287867 0.0089331816190877534 0 166 1 5.99673557 0.9975193 0.0035833193575627669 1 167 1 4.04529858 0.982796669 0.025035126906624509 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.9520569 0.278471351 0.47087141475726779 0 233 1 4.02608871 0.982468843 0.025516438252352936 1 234 0 -4.248459 0.0140850125 0.020464842181276369 0 -235 0 +235 0 ? ? ? 0 236 1 8.120999 0.9997029 0.00042873045458892818 1 237 1 3.97880173 0.9816355 0.02674065507190734 1 238 1 9.321572 0.999910533 0.00012907875367543245 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 9.005473 0.9998773 0.00017706700464490148 1 247 1 1.74156952 0.8508863 0.23296175567699981 1 248 0 -4.675151 0.009237982 0.013389533098992946 0 -249 0 +249 0 ? ? ? 0 250 0 -6.85401249 0.00105410081 0.0015215480827833656 0 251 1 5.57425 0.996220052 0.0054636445480688526 1 252 0 2.525447 0.9259066 3.7545111242363967 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 1.04092026 0.73902756 0.43629992864352879 1 273 1 -1.54782867 0.1754001 2.5112785805368674 0 274 0 -5.9604435 0.002572135 0.0037155870547972859 0 -275 0 +275 0 ? ? ? 0 276 0 -6.401946 0.00165558141 0.0023904784483303873 0 277 0 -7.16993856 0.0007687785 0.0011095394740419171 0 278 0 -6.95376 0.000954126066 0.0013771700453364219 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 5.963811 0.997436464 0.0037031495249571483 1 290 0 -7.386117 0.0006194141 0.0008939025092919693 0 291 0 -6.95376 0.000954126066 0.0013771700453364219 0 -292 1 +292 1 ? ? ? 0 293 1 3.82977867 0.97874707 0.030992011604895862 1 -294 0 +294 0 ? ? ? 0 295 1 4.92571831 0.992794752 0.010432605435470237 1 296 0 -0.00817585 0.497956038 0.9941143927175089 0 -297 0 +297 0 ? ? ? 0 298 0 -4.33488274 0.0129339322 0.018781442391253584 0 299 1 4.3436203 0.987177134 0.018619117824464626 1 300 1 4.40119934 0.987885952 0.017583597598896144 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.9174366 0.7145195 0.48495470886222203 1 313 0 -7.386117 0.0006194141 0.0008939025092919693 0 314 0 -7.21227646 0.0007369329 0.001063561390310713 0 -315 0 +315 0 ? ? ? 0 316 1 2.16819382 0.897356749 0.15624644520733796 1 317 1 6.37115574 0.998292744 0.0024651544582865699 1 318 0 -6.826747 0.00108320534 0.0015635819620021878 0 319 0 -0.115743637 0.471096337 0.91892312744499838 0 320 1 4.11173058 0.9838846 0.023439022974424555 1 -321 0 +321 0 ? ? ? 0 322 0 -6.18576765 0.002054292 0.0029667652410511319 0 323 1 3.31629562 0.964983642 0.051423608112212277 1 324 0 -6.95376 0.000954126066 0.0013771700453364219 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -5.098096 0.00607128069 0.0087857038468610767 0 409 0 -6.106088 0.00222429563 0.0032125544404639546 0 410 0 -7.16993856 0.0007687785 0.0011095394740419171 0 -411 0 +411 0 ? ? ? 0 412 1 6.053849 0.9976567 0.0033846301153950954 1 413 0 -5.12191772 0.00592920836 0.0085794995279385791 0 414 1 4.639694 0.9904318 0.013870479681066349 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -6.82828045 0.00108154735 0.0015611874005433336 0 615 0 -5.42623472 0.00438035838 0.0063334027525860032 0 616 0 -6.401946 0.00165558141 0.0023904784483303873 0 -617 0 +617 0 ? ? ? 0 618 0 -6.01794958 0.00242874329 0.0035081978824050039 0 619 0 -5.633953 0.00356168649 0.0051475999586587254 0 620 0 -6.401946 0.00165558141 0.0023904784483303873 0 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt index 5197b73592..ffa3b3b912 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 2.322781 0.998222351 0.0025668874786068312 1 35 0 -1.91525483 0.005915621 0.0085597800780180429 0 37 0 -0.682821751 0.142692313 0.22211501491184898 0 -40 0 +40 0 ? ? ? 0 41 1 0.809957266 0.9039319 0.14571398208584879 1 44 1 2.73944569 0.999422848 0.00083289438051421241 1 45 0 -1.94671428 0.0054360223 0.0078639160441893435 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.53363073 0.0164193157 0.023884691545080352 0 141 0 -2.06519866 0.00395226665 0.0057132130161921821 0 144 0 -1.91525483 0.005915621 0.0085597800780180429 0 -145 0 +145 0 ? ? ? 0 147 0 -1.84313476 0.007179688 0.010395463465996112 0 150 0 -1.97708547 0.005009752 0.0072457094989476683 0 151 1 1.45099306 0.9815549 0.026859095208834581 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -2.1974268 0.00276787742 0.0039987395951823247 0 156 0 -1.89696681 0.006213517 0.0089921755974289187 0 161 0 -1.42502642 0.0218984671 0.031943861389482818 0 -164 0 +164 0 ? ? ? 0 167 1 2.354064 0.998366237 0.0023589497379793915 1 169 0 -2.21602869 0.00263251015 0.0038029171463890205 0 171 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.30174255 0.9998737 0.00018222712667875736 1 247 1 0.629675865 0.8525092 0.23021269060466767 1 248 0 -1.12171221 0.0483664 0.07152188601007381 0 -249 0 +249 0 ? ? ? 0 250 0 -2.04691076 0.00415170472 0.0060021119500057712 0 252 0 1.01602411 0.942604 4.12290612676055 1 254 1 2.17657 0.997363031 0.0038093670302838936 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.897539 0.006203974 0.008978322239894131 0 271 0 -1.368627 0.0254129283 0.037137010195002478 0 272 1 0.765766859 0.8930476 0.16319106749073262 1 -275 0 +275 0 ? ? ? 0 276 0 -1.78302681 0.008435549 0.012221545522817861 0 277 0 -2.047483 0.00414531538 0.0059928556832883097 0 278 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.897539 0.006203974 0.008978322239894131 0 293 1 1.17371225 0.961760044 0.056251103565820429 1 296 0 0.4346769 0.773356 2.1415002561411738 1 -297 0 +297 0 ? ? ? 0 299 1 2.019999 0.9959794 0.0058121507954068162 1 300 1 2.2421217 0.9977903 0.0031914838554231535 1 301 0 -1.897539 0.006203974 0.008978322239894131 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.9578371 0.9334789 0.099310694790810672 1 317 1 2.6748023 0.999312758 0.00099182083151890563 1 319 0 0.702352047 0.87553966 3.0062420076587428 1 -321 0 +321 0 ? ? ? 0 323 1 1.138917 0.9581469 0.061681187824867326 1 327 0 -2.047483 0.00414531538 0.0059928556832883097 0 328 1 0.998517036 0.9399896 0.089283259387362562 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 5.011583 0.999998748 1.8058189642293044E-06 1 613 0 -1.848559 0.00707593327 0.010244702155254935 0 614 0 -1.9948014 0.0047766366 0.006907740990800825 0 -617 0 +617 0 ? ? ? 0 618 0 -1.6507988 0.0120159388 0.017440327322179375 0 619 0 -1.51857078 0.017089799 0.024868477463820319 0 621 0 -0.159297943 0.406589121 0.75289671799437707 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -2.01459932 0.02046752 0.029834763390847389 0 19 0 -1.722561 0.0706015 0.10563077935630806 0 22 0 -2.18885 0.009580599 0.013888519789490289 0 -23 1 +23 1 ? ? ? 0 24 0 -2.45265675 0.00300532184 0.0043422911736604322 0 26 0 -2.15108967 0.0113010341 0.016396771634848997 0 27 0 -1.8968116 0.0339725837 0.049863960916695586 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -2.18885 0.009580599 0.013888519789490289 0 135 0 -1.67432666 0.0859358 0.12963259925365803 0 136 0 -2.04283071 0.0181100331 0.026366733420336564 0 -139 0 +139 0 ? ? ? 0 140 0 -2.21708131 0.00846625 0.012266215273071623 0 142 1 -0.39607358 0.963921666 0.053012185419208942 0 143 0 -2.12454414 0.0126899984 0.018424952871546458 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -1.87877429 0.03668837 0.053925509586129468 0 155 1 -0.6914537 0.878659666 0.18662362559522522 0 157 0 -2.30663776 0.005714676 0.0082681821048535484 0 -158 0 +158 0 ? ? ? 0 159 1 0.826603651 0.9998317 0.00024285994311160547 1 160 1 0.465637445 0.999170542 0.0011971518470987805 1 162 0 -2.16061854 0.0108400183 0.015724220850716938 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 -0.0580048561 0.9916699 0.012068138193505744 0 232 0 -1.12658465 0.5141482 1.0414117089703063 0 234 0 -1.83982372 0.04328234 0.063834867527523209 0 -235 0 +235 0 ? ? ? 0 236 1 0.979896069 0.999914467 0.00012340282955575714 1 238 1 0.930060863 0.9998934 0.00015376068339360816 1 243 0 -2.01161838 0.02073334 0.030226326896041298 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1.41079044 0.999987245 1.8402261006614678E-05 1 287 0 -2.18885 0.009580599 0.013888519789490289 0 289 1 0.0753440857 0.9953623 0.0067063762389963991 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 -0.367047548 0.9681271 0.046731673516829733 0 298 0 -1.44922507 0.202717274 0.32683668378040626 0 302 1 1.39901066 0.9999866 1.9348177961999343E-05 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -2.45265675 0.00300532184 0.0043422911736604322 0 310 0 -2.33486915 0.00504769245 0.0073007223089671565 0 313 0 -2.50911975 0.002343144 0.0033844089083832525 0 -315 0 +315 0 ? ? ? 0 318 0 -2.45265675 0.00300532184 0.0043422911736604322 0 320 1 0.09479761 0.995742738 0.006155041546569217 1 322 0 -2.16061854 0.0108400183 0.015724220850716938 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -2.48088837 0.00265371148 0.0038335852528692412 0 408 0 -2.10281515 0.0139513118 0.020269210492385212 0 410 0 -2.48088837 0.00265371148 0.0038335852528692412 0 -411 0 +411 0 ? ? ? 0 412 1 -0.0120434761 0.9931908 0.0098571608082237746 0 417 0 -2.48088837 0.00265371148 0.0038335852528692412 0 420 0 -1.86625433 0.0386949927 0.056933846240527428 0 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt index 1c905b24c8..b2ca41a554 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-SmoothedHinge-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.51313376 0.997060657 0.0042468207167790329 1 21 1 1.88215756 0.9991858 0.0011751199613487565 1 22 0 -1.5913856 0.006754517 0.0097777676588594702 0 -23 1 +23 1 ? ? ? 0 24 0 -1.85515833 0.00270523666 0.0039081201011675363 0 25 1 0.183981657 0.767693 0.38139863321618261 1 26 0 -1.596403 0.006638234 0.0096088756803218642 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.48321867 0.244259775 0.40403768061841711 0 38 1 1.01125383 0.983338952 0.024239302762828055 1 39 1 -0.0106446743 0.62648654 0.67464458153125895 0 -40 0 +40 0 ? ? ? 0 41 1 0.8607874 0.9721748 0.040712322202366577 1 42 1 1.4849956 0.9967589 0.004683544378464086 1 43 1 -0.7279444 0.121085733 3.0458992041721875 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.404305 0.0128827207 0.018706593545987932 0 137 0 -1.701774 0.00460771844 0.0066628947272309403 0 138 0 -1.360993 0.0149499336 0.021731041715098105 0 -139 0 +139 0 ? ? ? 0 140 0 -1.701774 0.00460771844 0.0066628947272309403 0 141 0 -1.8888545 0.00240627048 0.0034756978965475691 0 142 1 0.881847143 0.974092543 0.037869253621396266 1 143 0 -1.545697 0.007910921 0.01145843020471164 0 144 0 -1.77846622 0.00353102176 0.0051032026604077695 0 -145 0 +145 0 ? ? ? 0 146 1 -0.216273546 0.450334132 1.1509322683430081 0 147 0 -1.82796979 0.00297325244 0.0042958860993499966 0 148 0 -0.7305672 0.12011648 0.18461554363807445 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.429970026 0.886195242 0.17430351324204957 1 156 0 -1.92223477 0.0021426254 0.0030944713637373776 0 157 0 -1.66807771 0.005178785 0.0074908208279084272 0 -158 0 +158 0 ? ? ? 0 159 1 2.79265332 0.999965847 4.9273870913609948E-05 1 160 1 2.08471417 0.999597847 0.00058030016510027748 1 161 0 -1.23200977 0.0232356917 0.033917610773834123 0 162 0 -1.48099709 0.009891603 0.014341615100451873 0 163 0 -1.20272636 0.02566766 0.037514142314967996 0 -164 0 +164 0 ? ? ? 0 165 0 -1.09945023 0.0363804549 0.053464438820388052 0 166 1 1.35837817 0.99497056 0.0072742561293771086 1 167 1 1.40636182 0.9957416 0.0061566823680882154 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.0236132145 0.6158536 1.3802718900263455 0 233 1 1.45269513 0.9963742 0.005240444497289571 1 234 0 -1.12313449 0.0335955 0.049300920379201259 0 -235 0 +235 0 ? ? ? 0 236 1 2.801736 0.9999669 4.7725975020197848E-05 1 237 1 1.18725443 0.9909072 0.013178153095375312 1 238 1 2.58363461 0.999929249 0.00010207531428030829 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.15890861 0.9996894 0.00044817040124598361 1 247 1 0.7686317 0.9620387 0.055833170607592193 1 248 0 -0.9843831 0.0533667132 0.079122441964183776 0 -249 0 +249 0 ? ? ? 0 250 0 -2.03262329 0.001459477 0.0021071182550953969 0 251 1 1.98737311 0.999435544 0.0008145677672951316 1 252 0 0.938287735 0.9786189 5.5475214513768956 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.364958525 0.861275733 0.21545291272160746 1 273 1 -0.0762293339 0.5716655 0.80675680440858633 0 274 0 -1.41909039 0.0122437514 0.017773027141160865 0 -275 0 +275 0 ? ? ? 0 276 0 -1.5913856 0.006754517 0.0097777676588594702 0 277 0 -1.96554685 0.00184303813 0.0026613952529717312 0 278 0 -1.85515833 0.00270523666 0.0039081201011675363 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.51208711 0.99705 0.0042622586236973218 1 290 0 -2.07593513 0.00125529012 0.0018121384446850468 0 291 0 -1.85515833 0.00270523666 0.0039081201011675363 0 -292 1 +292 1 ? ? ? 0 293 1 1.33046842 0.9944597 0.0080152036111407286 1 -294 0 +294 0 ? ? ? 0 295 1 1.2251215 0.9920222 0.011555664783429847 1 296 0 0.111403227 0.719591737 1.8343992308763424 1 -297 0 +297 0 ? ? ? 0 298 0 -0.7513188 0.112681925 0.17247673834356275 0 299 1 1.96303582 0.9993856 0.00088667099785278705 1 300 1 1.383044 0.995382845 0.0066765713113142704 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.17001605 0.7589018 0.39801492605707506 1 313 0 -2.07593513 0.00125529012 0.0018121384446850468 0 314 0 -1.97056413 0.00181115558 0.0026153142649758387 0 -315 0 +315 0 ? ? ? 0 316 1 0.541802645 0.9199835 0.12032009864475471 1 317 1 1.56700492 0.9975625 0.0035208219605770688 1 318 0 -1.84010613 0.00285049225 0.0041182633675105971 0 319 0 0.05099702 0.675234735 1.6225307564202673 1 320 1 1.7057507 0.9984955 0.0021721414278146082 1 -321 0 +321 0 ? ? ? 0 322 0 -1.48099709 0.009891603 0.014341615100451873 0 323 1 0.7110567 0.95399344 0.067948749601842412 1 324 0 -1.85515833 0.00270523666 0.0039081201011675363 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.45910537 0.0106672905 0.015472318523715401 0 409 0 -1.54807365 0.007846191 0.011364302031181555 0 410 0 -1.96554685 0.00184303813 0.0026613952529717312 0 -411 0 +411 0 ? ? ? 0 412 1 1.61123109 0.9979099 0.0030185274243607983 1 413 0 -1.063524 0.0410328656 0.060446722723610943 0 414 1 1.54281878 0.9973487 0.0038300596622238277 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.78348351 0.00347004039 0.0050149160869795298 0 615 0 -1.31768119 0.0173430257 0.025240206070453266 0 616 0 -1.5913856 0.006754517 0.0097777676588594702 0 -617 0 +617 0 ? ? ? 0 618 0 -1.404305 0.0128827207 0.018706593545987932 0 619 0 -1.21722436 0.024434112 0.035688781701501276 0 620 0 -1.5913856 0.006754517 0.0097777676588594702 0 diff --git a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-TrainTest-breast-cancer.txt index ff963fe7a2..9b8d818944 100644 --- a/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SDCA/BinarySDCA-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.37743354 0.7985785 0.32449386248842338 1 21 1 1.7630446 0.853590548 0.22838389385799779 1 22 0 -1.874893 0.132976577 0.20585712538187464 0 -23 1 +23 1 ? ? ? 0 24 0 -1.948468 0.124720506 0.19218432401279964 0 25 1 0.243255854 0.5605158 0.83517300013076445 1 26 0 -1.79869926 0.142009482 0.22096639052344227 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -1.30918431 0.212623373 0.34487420705553562 0 38 1 1.52097869 0.8206825 0.28510385981645775 1 39 1 0.568297148 0.638370156 0.64753488753370236 1 -40 0 +40 0 ? ? ? 0 41 1 0.08091617 0.520218 0.94281173541226504 1 42 1 2.4137857 0.9178725 0.12363434755323521 1 43 1 -0.08661461 0.478359878 1.0638317048217665 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.80893993 0.1407663 0.21887750444885776 0 137 0 -1.9332242 0.126394138 0.1949455575113978 0 138 0 -1.70428443 0.153906524 0.24111103479242962 0 -139 0 +139 0 ? ? ? 0 140 0 -1.9332242 0.126394138 0.1949455575113978 0 141 0 -1.99917722 0.119289339 0.18325996439997919 0 142 1 0.6176696 0.649688363 0.62218023037407078 1 143 0 -1.44733071 0.190412715 0.3047414631100317 0 144 0 -1.94084609 0.125554934 0.19356034047915699 0 -145 0 +145 0 ? ? ? 0 146 1 0.272925377 0.567810953 0.81651741701068026 1 147 0 -1.8198235 0.13945505 0.21667754281184065 0 148 0 -0.713562 0.328812242 0.57521169167023078 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.9670353 0.7245282 0.46488626460347809 1 156 0 -1.793424 0.142653435 0.22204959316054371 0 157 0 -1.882515 0.132100269 0.20439971838260004 0 -158 0 +158 0 ? ? ? 0 159 1 3.20693851 0.961094558 0.057249715926430703 1 160 1 2.3220613 0.910687745 0.13497162539860094 1 161 0 -1.65904641 0.159890041 0.25134992500211428 0 162 0 -1.81656182 0.139846936 0.21733468482291701 0 163 0 -1.60762358 0.166918814 0.26347099825296155 0 -164 0 +164 0 ? ? ? 0 165 0 -1.50459647 0.181740969 0.28937047546414252 0 166 1 2.457339 0.9210965 0.11857577878171396 1 167 1 1.31327176 0.7880601 0.34362238392184646 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.213132143 0.446917742 0.85443403273590568 0 233 1 1.23634028 0.774926364 0.36786886673349695 1 234 0 -1.00412917 0.268130362 0.4503413991221501 0 -235 0 +235 0 ? ? ? 0 236 1 2.65734458 0.934462249 0.097791712220334726 1 237 1 1.59322524 0.8310694 0.26695912082718543 1 238 1 3.30821967 0.9647097 0.051833223594515407 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 3.32132936 0.965153337 0.051169929428424521 1 247 1 0.572052956 0.639236748 0.64557774708968529 1 248 0 -1.29279661 0.215379834 0.34993367973529765 0 -249 0 +249 0 ? ? ? 0 250 0 -1.85175514 0.135666952 0.21034077001927648 0 251 1 1.686516 0.843765438 0.24508610192596253 1 252 0 0.9327009 0.717622936 1.8243051828654713 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.5505221 0.6342567 0.65686119381445462 1 273 1 -0.451063156 0.389108032 1.3617573340834921 0 274 0 -1.72499943 0.151228324 0.23655158068052121 0 -275 0 +275 0 ? ? ? 0 276 0 -1.874893 0.132976577 0.20585712538187464 0 277 0 -2.00679922 0.1184909 0.18195262731195211 0 278 0 -1.948468 0.124720506 0.19218432401279964 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 2.363632 0.914011657 0.12971552945936157 1 290 0 -2.06513023 0.112532459 0.17223374110934919 0 291 0 -1.948468 0.124720506 0.19218432401279964 0 -292 1 +292 1 ? ? ? 0 293 1 1.17569327 0.764172554 0.38802965202562639 1 -294 0 +294 0 ? ? ? 0 295 1 1.9321816 0.8734907 0.1951357665052601 1 296 0 0.2875533 0.571397066 1.222286369266439 1 -297 0 +297 0 ? ? ? 0 298 0 -1.39716244 0.198266774 0.31880583096618414 0 299 1 1.31489539 0.788331151 0.34312631094095036 1 300 1 1.6675427 0.8412479 0.24939706872810494 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.6733315 0.662248731 0.59455492152164024 1 313 0 -2.06513023 0.112532459 0.17223374110934919 0 314 0 -2.03932476 0.115135506 0.17647155325903272 0 -315 0 +315 0 ? ? ? 0 316 1 0.9730499 0.725726962 0.46250122508222125 1 317 1 2.53153682 0.9263233 0.11041230078378952 1 318 0 -1.85089087 0.135768339 0.21051001013383977 0 319 0 0.279238 0.5693594 1.2154438271457824 1 320 1 1.35926127 0.795639634 0.32981295026166818 1 -321 0 +321 0 ? ? ? 0 322 0 -1.81656182 0.139846936 0.21733468482291701 0 323 1 1.28926969 0.7840235 0.35103115421599534 1 324 0 -1.948468 0.124720506 0.19218432401279964 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -1.33084726 0.209019259 0.33828552617088203 0 409 0 -1.77023745 0.1455128 0.22686922143757757 0 410 0 -2.00679922 0.1184909 0.18195262731195211 0 -411 0 +411 0 ? ? ? 0 412 1 2.25751042 0.9052964 0.14353790094246915 1 413 0 -1.58000016 0.170795456 0.27020007178125394 0 414 1 1.60186315 0.8322786 0.2648615365498318 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.97337174 0.122027189 0.18775183095640005 0 615 0 -1.59962881 0.1680335 0.26540264924934215 0 616 0 -1.874893 0.132976577 0.20585712538187464 0 -617 0 +617 0 ? ? ? 0 618 0 -1.80893993 0.1407663 0.21887750444885776 0 619 0 -1.74298692 0.148933932 0.23265696296547442 0 620 0 -1.874893 0.132976577 0.20585712538187464 0 diff --git a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-CV-breast-cancer.txt index 8b3d215017..bee6e20c10 100644 --- a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.344691515 0.5853297 0.77267858291118818 1 35 0 -0.6512495 0.342708021 0.60539371596619707 0 37 0 -0.503962 0.376610041 0.68179317658398975 0 -40 0 +40 0 ? ? ? 0 41 1 -0.09442055 0.4764124 1.069717174215209 0 44 1 0.5798913 0.6410424 0.64150828626867074 1 45 0 -0.6560674 0.341623574 0.60301541680763648 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -0.597589 0.354895473 0.63239515287919279 0 141 0 -0.6678773 0.3389723 0.5972173671855312 0 144 0 -0.6512495 0.342708021 0.60539371596619707 0 -145 0 +145 0 ? ? ? 0 147 0 -0.6018609 0.353918076 0.63021098184538349 0 150 0 -0.656989932 0.3414161 0.60256083089734203 0 151 1 0.160812974 0.540116847 0.88865654739065503 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -0.67789495 0.336731285 0.59233461600767046 0 156 0 -0.5912131 0.356356561 0.63566639957530646 0 161 0 -0.5840918 0.3579916 0.63933593475596628 0 -164 0 +164 0 ? ? ? 0 167 1 0.315922141 0.5783301 0.79003490476906479 1 169 0 -0.6661818 0.33935234 0.59804704249396301 0 171 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 0.9649427 0.7241103 0.46571861194179465 1 247 1 0.07307994 0.51826185 0.94824689644610127 1 248 0 -0.4621131 0.386484653 0.70482866116462817 0 -249 0 +249 0 ? ? ? 0 250 0 -0.607840955 0.352551848 0.62716342858897578 0 252 0 0.210040927 0.552318037 1.1594538998856256 1 254 1 0.5551709 0.635334432 0.65441188637353787 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -0.644639254 0.344198585 0.60866907932442826 0 271 0 -0.6045689 0.3532991 0.62882950155653372 0 272 1 0.109180689 0.5272681 0.92339134731632821 1 -275 0 +275 0 ? ? ? 0 276 0 -0.6412319 0.3449681 0.61036295013192154 0 277 0 -0.6612671 0.340455025 0.60045705425766605 0 278 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -0.644639254 0.344198585 0.60866907932442826 0 293 1 0.2438618 0.560665131 0.83478874766701627 1 296 0 0.0298114419 0.5074523 1.0216646747888918 1 -297 0 +297 0 ? ? ? 0 299 1 0.29232645 0.5725656 0.80448706147450022 1 300 1 0.440484524 0.6083745 0.71696846696669991 1 301 0 -0.644639254 0.344198585 0.60866907932442826 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.23788023 0.5591912 0.83858636815416177 1 317 1 0.736536 0.676237941 0.56439713300164063 1 319 0 0.032892406 0.508222342 1.0239219003304794 1 -321 0 +321 0 ? ? ? 0 323 1 0.3295461 0.581648946 0.78177941761076775 1 327 0 -0.6612671 0.340455025 0.60045705425766605 0 328 1 0.309677362 0.5768065 0.79384070845234189 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 1.18309152 0.7655032 0.38551963408922602 1 613 0 -0.578392267 0.3593026 0.64228498181768678 0 614 0 -0.663600147 0.339931339 0.59931199209014285 0 -617 0 +617 0 ? ? ? 0 618 0 -0.6312144 0.347235233 0.61536490408676614 0 619 0 -0.621196747 0.349509329 0.62039972730149728 0 621 0 -0.2970261 0.426284641 0.80159295389429652 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -0.5587295 0.363841474 0.65254177576954708 0 19 0 -0.521061659 0.372604 0.67255179238253271 0 22 0 -0.597002566 0.355029762 0.63269550517202178 0 -23 1 +23 1 ? ? ? 0 24 0 -0.6152313 0.3508668 0.62341353797116505 0 26 0 -0.568058968 0.361684829 0.64765915777643768 0 27 0 -0.559334755 0.3637014 0.6522241546694586 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -0.586384356 0.35746488 0.6381527823333405 0 135 0 -0.4233432 0.395717025 0.72670379813371644 0 136 0 -0.578168631 0.3593541 0.6424009484072789 0 -139 0 +139 0 ? ? ? 0 140 0 -0.6164417 0.350591153 0.62280105585760936 0 142 1 0.253997982 0.5631603 0.82838245949919198 1 143 0 -0.455442041 0.388067663 0.70855595514156899 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -0.498274028 0.377946377 0.68488914380895782 0 155 1 0.381782949 0.5943031 0.75072925773026189 1 157 0 -0.5963973 0.355168372 0.63300558819075581 0 -158 0 +158 0 ? ? ? 0 159 1 1.14907432 0.7593418 0.3971787124185161 1 160 1 0.835970342 0.697615862 0.51949525142708575 1 162 0 -0.5775634 0.359493434 0.64271473576785443 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 0.867232 0.7041694 0.50600557448041072 1 232 0 -0.0153304338 0.496167481 0.98898385339386186 0 234 0 -0.28234 0.4298802 0.81066299300396294 0 -235 0 +235 0 ? ? ? 0 236 1 0.9360302 0.718297064 0.47734747567952907 1 238 1 1.21385264 0.770979941 0.37523476983545384 1 243 0 -0.3776855 0.406685263 0.7531304771823496 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1.42419529 0.8059953 0.31115669483110281 1 287 0 -0.586384356 0.35746488 0.6381527823333405 0 289 1 0.8870376 0.7082785 0.49761139182067243 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 0.7293541 0.674663544 0.56775988879752004 1 298 0 -0.4508557 0.389157325 0.711127239010238 0 302 1 1.38025045 0.7990312 0.3236762608744207 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -0.6152313 0.3508668 0.62341353797116505 0 310 0 -0.6052183 0.353150755 0.62849857907527806 0 313 0 -0.654109538 0.342064053 0.60398095649539585 0 -315 0 +315 0 ? ? ? 0 318 0 -0.5833766 0.358156025 0.63970545866432549 0 320 1 0.483023465 0.618461549 0.69324419149565364 1 322 0 -0.5775634 0.359493434 0.64271473576785443 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -0.6346704 0.3464523 0.61363554879852178 0 408 0 -0.398791075 0.401602834 0.74082475334425091 0 410 0 -0.6346704 0.3464523 0.61363554879852178 0 -411 0 +411 0 ? ? ? 0 412 1 0.851157367 0.7008099 0.51290494780474771 1 417 0 -0.6346704 0.3464523 0.61363554879852178 0 420 0 -0.3393041 0.415978521 0.77590666598714197 0 diff --git a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-CV-breast-cancer.txt index a2364522f6..ca9f631c71 100644 --- a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 0.6522908 0.9885379 0.01663180464505477 1 35 0 -1.12420082 0.0107460516 0.015587176400536187 0 37 0 -0.865502357 0.0386134759 0.056811513682664551 0 -40 0 +40 0 ? ? ? 0 41 1 -0.1372056 0.6145622 0.70236902985400396 0 44 1 1.0547893 0.9984863 0.0021854902279334833 1 45 0 -1.13070083 0.0104023358 0.015085999817509936 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -1.03150129 0.0170594156 0.024823881934527593 0 141 0 -1.15380061 0.00926659 0.0134311903289673 0 144 0 -1.12420082 0.0107460516 0.015587176400536187 0 -145 0 +145 0 ? ? ? 0 147 0 -1.03270125 0.0169580057 0.024675046951938132 0 150 0 -1.13440084 0.0102115627 0.0148079061901775 0 151 1 0.335192084 0.945543051 0.080784949020458824 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -1.17080045 0.008510069 0.012329973523626827 0 156 0 -1.01700115 0.0183329377 0.026694285275120175 0 161 0 -1.00710154 0.0192555338 0.028050804915513337 0 -164 0 +164 0 ? ? ? 0 167 1 0.6017914 0.98525393 0.021432495728287648 1 169 0 -1.15050054 0.009420992 0.013656046492446974 0 171 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1.7594862 0.999956965 6.208707291578748E-05 1 247 1 0.172593474 0.8841638 0.17761443218515088 1 248 0 -0.793102443 0.0547425 0.081220704071742325 0 -249 0 +249 0 ? ? ? 0 250 0 -1.04660094 0.0158256628 0.0230141968119793 0 252 0 0.423291922 0.964417338 4.8126817596940885 1 254 1 1.03148985 0.998297453 0.0024583495418655562 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -1.11160088 0.01144463 0.016606319988959024 0 271 0 -1.04360127 0.01606356 0.023362970756245272 0 272 1 0.243792772 0.9162424 0.12619873583154692 1 -275 0 +275 0 ? ? ? 0 276 0 -1.10720086 0.0116990069 0.016977604564195337 0 277 0 -1.14120078 0.009869945 0.014310057532988397 0 278 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -1.11160088 0.01144463 0.016606319988959024 0 293 1 0.478191853 0.97280544 0.039776797794310127 1 296 0 0.0886935 0.83318603 2.5836879844847194 1 -297 0 +297 0 ? ? ? 0 299 1 0.5653906 0.9823277 0.025723714124946184 1 300 1 0.8071904 0.994728446 0.0076253612609159995 1 301 0 -1.11160088 0.01144463 0.016606319988959024 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 0.472591877 0.9720465 0.040902773075660377 1 317 1 1.35288787 0.9996641 0.00048464251232972397 1 319 0 0.100693226 0.841446757 2.6569607115771978 1 -321 0 +321 0 ? ? ? 0 323 1 0.6351912 0.987516046 0.018123905012295945 1 327 0 -1.14120078 0.009869945 0.014310057532988397 0 328 1 0.5871916 0.9841424 0.023060980928491851 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 2.158784 0.9999943 8.2551908597278378E-06 1 613 0 -0.993201256 0.0206282046 0.030071445887912274 0 614 0 -1.14700067 0.009587526 0.013898610301947204 0 -617 0 +617 0 ? ? ? 0 618 0 -1.090201 0.0127353743 0.018491259389232038 0 619 0 -1.07320118 0.0138622615 0.020138926273650697 0 621 0 -0.4947039 0.207426473 0.33538331559891793 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -1.02980137 0.0147205992 0.021395199721883466 0 19 0 -0.9630018 0.0192868058 0.028096807322037363 0 22 0 -1.0968008 0.011213962 0.016269722836470393 0 -23 1 +23 1 ? ? ? 0 24 0 -1.13000059 0.00979631 0.014202769680513335 0 26 0 -1.04700089 0.0137287723 0.019943648017493942 0 27 0 -1.03000128 0.0147086745 0.021377739156459911 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -1.079601 0.0120263351 0.017455508586451673 0 135 0 -0.7955023 0.03769685 0.055436642942084668 0 136 0 -1.0634011 0.0128445318 0.018650780679196597 0 -139 0 +139 0 ? ? ? 0 140 0 -1.13040054 0.009780362 0.014179534189705549 0 142 1 0.3573923 0.8180728 0.2896988683892544 1 143 0 -0.8376017 0.03189309 0.046761717990888088 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -0.917201638 0.0231931154 0.033854726268903304 0 155 1 0.5948919 0.922758937 0.1159742899072022 1 157 0 -1.09660089 0.0112230852 0.016283034217764617 0 -158 0 +158 0 ? ? ? 0 159 1 1.88818586 0.9995909 0.00059036523296398255 1 160 1 1.36718786 0.996521652 0.0050269435856363917 1 162 0 -1.06320107 0.01285497 0.018666035936317264 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1.39838827 0.9969394 0.0044222533640880836 1 232 0 -0.118705273 0.38808772 0.70860324232262772 0 234 0 -0.57100296 0.08979461 0.13573597071280369 0 -235 0 +235 0 ? ? ? 0 236 1 1.52918732 0.9982108 0.0025835996006501579 1 238 1 1.96808636 0.9997055 0.00042494571667690465 1 243 0 -0.703202665 0.0541654155 0.080340199841784268 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 2.32438421 0.999932 9.8119438852589428E-05 1 287 0 -1.079601 0.0120263351 0.017455508586451673 0 289 1 1.42798841 0.99728936 0.0039159372565436843 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 1.171589 0.9922556 0.011216254621805593 1 298 0 -0.8448025 0.0309909787 0.045417997957015462 0 302 1 2.26218414 0.999912143 0.00012675678201834402 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -1.13000059 0.00979631 0.014202769680513335 0 310 0 -1.11300075 0.0104985284 0.015226241971828952 0 313 0 -1.19720018 0.007447739 0.010785028389452023 0 -315 0 +315 0 ? ? ? 0 318 0 -1.07840109 0.0120851332 0.017541371482030246 0 320 1 0.77709043 0.961948335 0.055968683838555341 1 322 0 -1.06320107 0.01285497 0.018666035936317264 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -1.16360033 0.008542378 0.012376987044267063 0 408 0 -0.7606021 0.0432654768 0.063809436873055653 0 410 0 -1.16360033 0.008542378 0.012376987044267063 0 -411 0 +411 0 ? ? ? 0 412 1 1.35098827 0.9962829 0.0053726686646499901 1 417 0 -1.16360033 0.008542378 0.012376987044267063 0 420 0 -0.6557027 0.0650944263 0.097107435980156739 0 diff --git a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt index 9dec61f554..8153302a29 100644 --- a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-Hinge-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 1.222367 0.9828292 0.024987354577091989 1 21 1 1.49796534 0.9935609 0.0093196803205346252 1 22 0 -1.19740391 0.009379565 0.013595712679399167 0 -23 1 +23 1 ? ? ? 0 24 0 -1.23810339 0.008112156 0.01175109505221196 0 25 1 0.3802781 0.7344174 0.4453278964846259 1 26 0 -1.14100432 0.01146578 0.016637187062001499 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.79460907 0.0387752019 0.057054226638771945 0 38 1 1.34886718 0.989039242 0.015900330726329494 1 39 1 0.6641748 0.8848001 0.17657658460400094 1 -40 0 +40 0 ? ? ? 0 41 1 0.223279715 0.611168444 0.7103580388075188 1 42 1 2.04505968 0.999095738 0.0013051645925104319 1 43 1 0.169680834 0.56447953 0.82500683046719547 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -1.15510464 0.0109047387 0.015818618982627604 0 137 0 -1.24130344 0.008020028 0.011617101215948859 0 138 0 -1.0749054 0.0144998142 0.021071951694510416 0 -139 0 +139 0 ? ? ? 0 140 0 -1.24130344 0.008020028 0.011617101215948859 0 141 0 -1.28360271 0.00689550675 0.0099825705316781733 0 142 1 0.643275 0.8769101 0.18949916409473391 1 143 0 -0.8591082 0.0309929028 0.04542086264678518 0 144 0 -1.23970342 0.008065961 0.011683906942824285 0 -145 0 +145 0 ? ? ? 0 146 1 0.4436772 0.7764816 0.36497630408526505 1 147 0 -1.13820457 0.01158053 0.016804665017851262 0 148 0 -0.320714116 0.181648508 0.2892074628941344 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.9467714 0.9550218 0.066394431489110783 1 156 0 -1.11830425 0.0124295028 0.018044356997620709 0 157 0 -1.19580388 0.009433212 0.013673843835136703 0 -158 0 +158 0 ? ? ? 0 159 1 2.5887537 0.9998721 0.0001845491876153846 1 160 1 1.92586017 0.9986121 0.0020036987954509607 1 161 0 -1.04420578 0.01616599 0.023513167231551696 0 162 0 -1.15350461 0.0109670116 0.015909453114236246 0 163 0 -1.00530624 0.0185497329 0.027012930786301039 0 -164 0 +164 0 ? ? ? 0 165 0 -0.9275073 0.0243961066 0.035632579294711073 0 166 1 2.06175947 0.9991484 0.0012290814661556479 1 167 1 1.15397048 0.9781426 0.031883260676163518 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 0.0278829336 0.437610149 0.83035753638984611 1 233 1 1.11616874 0.975037336 0.036470630963644925 1 234 0 -0.544210553 0.09034561 0.13660958234622511 0 -235 0 +235 0 ? ? ? 0 236 1 2.15735817 0.999396145 0.00087144125636392123 1 237 1 1.37376881 0.9899692 0.014544463077068375 1 238 1 2.659554 0.9999008 0.00014309666195095306 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 2.70435286 0.9999156 0.00012176885553963592 1 247 1 0.616675138 0.8662006 0.207226880023576 1 248 0 -0.7762085 0.04131975 0.060878383940094143 0 -249 0 +249 0 ? ? ? 0 250 0 -1.16220379 0.0106326314 0.015421777653289784 0 251 1 1.43526506 0.9919444 0.011668877043853167 1 252 0 0.8921714 0.9457866 4.2052065773298537 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.6439743 0.8771814 0.18905285611569214 1 273 1 -0.149016142 0.29164663 1.7777066932129042 0 274 0 -1.08650517 0.0139153069 0.020216532376881539 0 -275 0 +275 0 ? ? ? 0 276 0 -1.19740391 0.009379565 0.013595712679399167 0 277 0 -1.28200269 0.00693504466 0.010040008870103924 0 278 0 -1.23810339 0.008112156 0.01175109505221196 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.98086047 0.998861 0.0016441447437435472 1 290 0 -1.32590222 0.005927712 0.0085773281383116576 0 291 0 -1.23810339 0.008112156 0.01175109505221196 0 -292 1 +292 1 ? ? ? 0 293 1 1.05756974 0.9693573 0.044899543967322132 1 -294 0 +294 0 ? ? ? 0 295 1 1.67846322 0.9966264 0.0048753374261991068 1 296 0 0.418977976 0.760678768 2.062979700875831 1 -297 0 +297 0 ? ? ? 0 298 0 -0.865008354 0.0303616133 0.044481280497859745 0 299 1 1.15566754 0.9782728 0.031691271742118471 1 300 1 1.43996739 0.9920784 0.011473925159666753 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.7111747 0.9009488 0.15048293737780147 1 313 0 -1.32590222 0.005927712 0.0085773281383116576 0 314 0 -1.3091023 0.006294775 0.0091101437491470354 0 -315 0 +315 0 ? ? ? 0 316 1 0.9520712 0.955833852 0.065168231187009498 1 317 1 2.1167593 0.999301255 0.0010084286664473847 1 318 0 -1.15680456 0.0108389612 0.015722679136914018 0 319 0 0.43477726 0.7708744 2.1257893169799549 1 320 1 1.20646763 0.9818366 0.026445122755562926 1 -321 0 +321 0 ? ? ? 0 322 0 -1.15350461 0.0109670116 0.015909453114236246 0 323 1 1.1870687 0.980549 0.028338400890888689 1 324 0 -1.23810339 0.008112156 0.01175109505221196 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.775208354 0.0414625444 0.061093287384022553 0 409 0 -1.11720467 0.0124781644 0.018115446186086449 0 410 0 -1.28200269 0.00693504466 0.010040008870103924 0 -411 0 +411 0 ? ? ? 0 412 1 1.89956212 0.9984746 0.0022023701778286324 1 413 0 -0.9887064 0.01966903 0.028659194266170487 0 414 1 1.3937664 0.9906591 0.013539378350893604 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.266803 0.00732205063 0.010602349145536318 0 615 0 -0.994706035 0.0192570463 0.028053029788189736 0 616 0 -1.19740391 0.009379565 0.013595712679399167 0 -617 0 +617 0 ? ? ? 0 618 0 -1.15510464 0.0109047387 0.015818618982627604 0 619 0 -1.11280513 0.0126747517 0.018402674010350579 0 620 0 -1.19740391 0.009379565 0.013595712679399167 0 diff --git a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-TrainTest-breast-cancer.txt index 29a93c0326..da39a0132a 100644 --- a/test/BaselineOutput/SingleRelease/SGD/BinarySGD-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SGD/BinarySGD-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 0.822916269 0.694855034 0.52521607106152413 1 21 1 1.02655149 0.736246765 0.441738705665011 1 22 0 -1.01600647 0.265806019 0.44576680836978433 0 -23 1 +23 1 ? ? ? 0 24 0 -1.03864312 0.261411875 0.43715802871945769 0 25 1 0.184996247 0.5461176 0.87271643290195555 1 26 0 -0.9708479 0.274711519 0.46337315957197994 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -0.7265403 0.325954378 0.56908185228017971 0 38 1 0.933466434 0.717778 0.47839033548395937 1 39 1 0.4073757 0.600458443 0.7358636922780708 1 -40 0 +40 0 ? ? ? 0 41 1 0.04465425 0.5111617 0.96814839425710286 1 42 1 1.43612671 0.8078541 0.30783330336923359 1 43 1 0.0479370356 0.511981964 0.96583510643055481 1 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -0.9885073 0.271207035 0.45641906112335939 0 137 0 -1.04836845 0.259538531 0.4335034307664255 0 138 0 -0.9260764 0.283721417 0.48140728969854896 0 -139 0 +139 0 ? ? ? 0 140 0 -1.04836845 0.259538531 0.4335034307664255 0 141 0 -1.07586777 0.2542888 0.42331107075059288 0 142 1 0.373193622 0.592230439 0.75576945149404562 1 143 0 -0.7654944 0.317454576 0.55100303407297502 0 144 0 -1.04350591 0.2604741 0.43532739229387868 0 -145 0 +145 0 ? ? ? 0 146 1 0.2147963 0.553493559 0.85336156547352904 1 147 0 -0.9662212 0.275634348 0.46520995753133637 0 148 0 -0.318193555 0.421116054 0.78865394770237895 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 0.600466 0.6457629 0.63092349091318589 1 156 0 -0.9430999 0.2802746 0.47448152072334643 0 157 0 -1.01114392 0.266756058 0.44763484776839352 0 -158 0 +158 0 ? ? ? 0 159 1 1.87013483 0.866473854 0.20677187910596315 1 160 1 1.34225249 0.7928601 0.33486178660539723 1 161 0 -0.901281357 0.288787246 0.49164689849299364 0 162 0 -0.9836446 0.2721692 0.45832499621527728 0 163 0 -0.855002642 0.2983845 0.51124744948014056 0 -164 0 +164 0 ? ? ? 0 165 0 -0.8168132 0.306440562 0.5279085681868505 0 166 1 1.49839211 0.817334533 0.29100140459599977 1 167 1 0.8153703 0.6932527 0.5285468008024331 1 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -0.08038044 0.4799157 0.9431826305616432 0 233 1 0.6980891 0.667763948 0.58258988829378422 1 234 0 -0.49562943 0.378568321 0.68633230772699549 0 -235 0 +235 0 ? ? ? 0 236 1 1.52862382 0.8218049 0.28313219549408758 1 237 1 0.9843018 0.727960944 0.45806704440223556 1 238 1 1.96605682 0.877186954 0.18904373922390869 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1.97140193 0.8777616 0.18809893386755816 1 247 1 0.323040724 0.580065131 0.78571319608569612 1 248 0 -0.6818254 0.335854024 0.59042772056631165 0 -249 0 +249 0 ? ? ? 0 250 0 -0.9754619 0.273793161 0.46154757804170599 0 251 1 0.9712138 0.725361347 0.46322822535916075 1 252 0 0.5616691 0.6368387 1.4613175226151991 1 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 0.354397058 0.587683439 0.76688885070680657 1 273 1 -0.237432361 0.4409192 1.1814138244518058 0 274 0 -0.9287806 0.28317216 0.48030142611403009 0 -275 0 +275 0 ? ? ? 0 276 0 -1.01600647 0.265806019 0.44576680836978433 0 277 0 -1.07100511 0.255211979 0.42509822607281988 0 278 0 -1.03864312 0.261411875 0.43715802871945769 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 1.4364171 0.8078992 0.30775283385408775 1 290 0 -1.10336709 0.249109536 0.4133256254437066 0 291 0 -1.03864312 0.261411875 0.43715802871945769 0 -292 1 +292 1 ? ? ? 0 293 1 0.6671076 0.6608552 0.59759395409472371 1 -294 0 +294 0 ? ? ? 0 295 1 1.17509961 0.764065564 0.38823165513968877 1 296 0 0.224430084 0.5558732 1.1709565135958244 1 -297 0 +297 0 ? ? ? 0 298 0 -0.7844506 0.313361466 0.54237727145415437 0 299 1 0.748945951 0.678949 0.55862488926125509 1 300 1 1.01898217 0.7347743 0.44462694496525584 1 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 0.4770298 0.617046237 0.69654949630747753 1 313 0 -1.10336709 0.249109536 0.4133256254437066 0 314 0 -1.09132075 0.251369655 0.41767456748452503 0 -315 0 +315 0 ? ? ? 0 316 1 0.5936177 0.6441948 0.63443112049115613 1 317 1 1.53676748 0.8229943 0.28104567061447505 1 318 0 -0.977696 0.2733492 0.46066585861057346 0 319 0 0.228058934 0.5567689 1.1738689621124481 1 320 1 0.7848048 0.6867147 0.54221722972478248 1 -321 0 +321 0 ? ? ? 0 322 0 -0.9836446 0.2721692 0.45832499621527728 0 323 1 0.799038649 0.6897688 0.53581523979313106 1 324 0 -1.03864312 0.261411875 0.43715802871945769 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -0.6859037 0.334944934 0.58845429463916576 0 409 0 -0.9535757 0.2781663 0.47026158367983178 0 410 0 -1.07100511 0.255211979 0.42509822607281988 0 -411 0 +411 0 ? ? ? 0 412 1 1.353524 0.794705153 0.331508396641091 1 413 0 -0.8662151 0.296042472 0.50643970604072797 0 414 1 0.9272311 0.7165132 0.48093477882048502 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -1.06382155 0.256579816 0.42775023737254403 0 615 0 -0.863645554 0.296578258 0.50753816766843873 0 616 0 -1.01600647 0.265806019 0.44576680836978433 0 -617 0 +617 0 ? ? ? 0 618 0 -0.9885073 0.271207035 0.45641906112335939 0 619 0 -0.961007953 0.276676446 0.46728696355201749 0 620 0 -1.01600647 0.265806019 0.44576680836978433 0 diff --git a/test/BaselineOutput/SingleRelease/SavePipe/TestParquetPrimitiveDataTypes-Data.txt b/test/BaselineOutput/SingleRelease/SavePipe/TestParquetPrimitiveDataTypes-Data.txt index af1e19e1cc..85a3d35b4b 100644 --- a/test/BaselineOutput/SingleRelease/SavePipe/TestParquetPrimitiveDataTypes-Data.txt +++ b/test/BaselineOutput/SingleRelease/SavePipe/TestParquetPrimitiveDataTypes-Data.txt @@ -11,5 +11,5 @@ #@ col=string:TX:7 #@ } sbyte short int long bool DateTimeOffset Interval string - 1 "2018-09-01T19:53:18.2910000+00:00" "31.00:00:00.0010000" "" +-128 -32768 -2147483648 -9223372036854775808 1 "2018-09-01T19:53:18.2910000+00:00" "31.00:00:00.0010000" "" 127 32767 2147483647 9223372036854775807 0 "2018-09-01T19:53:18.3110000+00:00" "31.00:00:00.0010000" """""" diff --git a/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-CV-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-CV-breast-cancer.txt index d0e7499c6d..cc1083e607 100644 --- a/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-CV-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-CV-breast-cancer.txt @@ -14,7 +14,7 @@ Instance Label Score Probability Log-loss Assigned 32 1 454.251282 1 0 1 35 0 -320.737427 0 0 0 37 0 -78.9787 5.011722E-35 0 0 -40 0 +40 0 ? ? ? 0 41 1 199.0091 1 0 1 44 1 656.8247 1 0 1 45 0 -322.804565 0 0 0 @@ -76,7 +76,7 @@ Instance Label Score Probability Log-loss Assigned 138 0 -289.415222 0 0 0 141 0 -344.61084 0 0 0 144 0 -320.737427 0 0 0 -145 0 +145 0 ? ? ? 0 147 0 -309.023651 0 0 0 150 0 -272.79837 0 0 0 151 1 249.55658 1 0 1 @@ -84,7 +84,7 @@ Instance Label Score Probability Log-loss Assigned 154 0 -349.126221 0 0 0 156 0 -227.212433 0 0 0 161 0 -274.6302 0 0 0 -164 0 +164 0 ? ? ? 0 167 1 283.316284 1 0 1 169 0 -331.047241 0 0 0 171 0 -301.379425 0 0 0 @@ -130,7 +130,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 1044.54126 1 0 1 247 1 409.237671 1 0 1 248 0 -221.818024 0 0 0 -249 0 +249 0 ? ? ? 0 250 0 -251.085815 0 0 0 252 0 307.68988 1 Infinity 1 254 1 728.536743 1 0 1 @@ -144,7 +144,7 @@ Instance Label Score Probability Log-loss Assigned 269 0 -301.379425 0 0 0 271 0 -283.3178 0 0 0 272 1 408.017578 1 0 1 -275 0 +275 0 ? ? ? 0 276 0 -316.222015 0 0 0 277 0 -325.252838 0 0 0 278 0 -301.379425 0 0 0 @@ -158,7 +158,7 @@ Instance Label Score Probability Log-loss Assigned 291 0 -301.379425 0 0 0 293 1 386.6949 1 0 1 296 0 139.220642 1 Infinity 1 -297 0 +297 0 ? ? ? 0 299 1 227.814941 1 0 1 300 1 407.6792 1 0 1 301 0 -301.379425 0 0 0 @@ -172,7 +172,7 @@ Instance Label Score Probability Log-loss Assigned 316 1 466.170166 1 0 1 317 1 736.0132 1 0 1 319 0 161.598083 1 Infinity 1 -321 0 +321 0 ? ? ? 0 323 1 388.03302 1 0 1 327 0 -325.252838 0 0 0 328 1 584.984 1 0 1 @@ -318,7 +318,7 @@ Instance Label Score Probability Log-loss Assigned 612 1 1115.01685 1 0 1 613 0 -169.23941 0 0 0 614 0 -292.156342 0 0 0 -617 0 +617 0 ? ? ? 0 618 0 -311.7066 0 0 0 619 0 -307.1912 0 0 0 621 0 -15.8763733 1.27344038E-07 1.8371862313930792E-07 0 @@ -375,7 +375,7 @@ Instance Label Score Probability Log-loss Assigned 17 0 -643.057739 0 0 0 19 0 -668.631836 0 0 0 22 0 -540.900146 0 0 0 -23 1 +23 1 ? ? ? 0 24 0 -604.696655 0 0 0 26 0 -270.657074 0 0 0 27 0 -566.4742 0 0 0 @@ -425,7 +425,7 @@ Instance Label Score Probability Log-loss Assigned 134 0 -670.7141 0 0 0 135 0 -421.652374 0 0 0 136 0 -553.6872 0 0 0 -139 0 +139 0 ? ? ? 0 140 0 -451.529541 0 0 0 142 1 488.315338 1 0 1 143 0 -142.331116 0 0 0 @@ -435,7 +435,7 @@ Instance Label Score Probability Log-loss Assigned 153 0 -322.504425 0 0 0 155 1 1089.60254 1 0 1 157 0 -617.483643 0 0 0 -158 0 +158 0 ? ? ? 0 159 1 1923.50525 1 0 1 160 1 1494.5094 1 0 1 162 0 -630.2707 0 0 0 @@ -474,7 +474,7 @@ Instance Label Score Probability Log-loss Assigned 231 1 1347.16248 1 0 1 232 0 355.92923 1 Infinity 1 234 0 50.92752 1 Infinity 1 -235 0 +235 0 ? ? ? 0 236 1 1204.77161 1 0 1 238 1 2421.62354 1 0 1 243 0 -499.813416 0 0 0 @@ -496,8 +496,8 @@ Instance Label Score Probability Log-loss Assigned 286 1 1933.38391 1 0 1 287 0 -670.7141 0 0 0 289 1 1586.01746 1 0 1 -292 1 -294 0 +292 1 ? ? ? 0 +294 0 ? ? ? 0 295 1 906.5393 1 0 1 298 0 -764.4775 0 0 0 302 1 1682.504 1 0 1 @@ -506,7 +506,7 @@ Instance Label Score Probability Log-loss Assigned 307 0 -604.696655 0 0 0 310 0 -657.927 0 0 0 313 0 -425.9555 0 0 0 -315 0 +315 0 ? ? ? 0 318 0 -994.1385 0 0 0 320 1 276.6519 1 0 1 322 0 -630.2707 0 0 0 @@ -551,7 +551,7 @@ Instance Label Score Probability Log-loss Assigned 407 0 -515.32605 0 0 0 408 0 -106.253662 0 0 0 410 0 -515.32605 0 0 0 -411 0 +411 0 ? ? ? 0 412 1 1142.7179 1 0 1 417 0 -515.32605 0 0 0 420 0 -151.036346 0 0 0 diff --git a/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-TrainTest-breast-cancer.txt b/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-TrainTest-breast-cancer.txt index 4a57fddad1..f3130d53ec 100644 --- a/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-TrainTest-breast-cancer.txt +++ b/test/BaselineOutput/SingleRelease/SymSGD/SymSGD-TrainTest-breast-cancer.txt @@ -22,7 +22,7 @@ Instance Label Score Probability Log-loss Assigned 20 1 10.1497192 0.9999609 5.6411412351548271E-05 1 21 1 -61.521698 1.91190378E-27 88.757048641689195 0 22 0 -407.94165 0 0 0 -23 1 +23 1 ? ? ? 0 24 0 -413.829132 0 0 0 25 1 -111.690765 0 Infinity 0 26 0 -333.49762 0 0 0 @@ -39,7 +39,7 @@ Instance Label Score Probability Log-loss Assigned 37 0 -367.9438 0 0 0 38 1 125.049805 1 0 1 39 1 -120.420319 0 Infinity 0 -40 0 +40 0 ? ? ? 0 41 1 -214.114883 0 Infinity 0 42 1 86.67383 1 0 1 43 1 -299.954132 0 Infinity 0 @@ -138,13 +138,13 @@ Instance Label Score Probability Log-loss Assigned 136 0 -408.326935 0 0 0 137 0 -401.2836 0 0 0 138 0 -411.7511 0 0 0 -139 0 +139 0 ? ? ? 0 140 0 -401.2836 0 0 0 141 0 -400.898315 0 0 0 142 1 -108.134125 0 Infinity 0 143 0 -305.7808 0 0 0 144 0 -407.556366 0 0 0 -145 0 +145 0 ? ? ? 0 146 1 -205.1228 0 Infinity 0 147 0 -408.638123 0 0 0 148 0 -448.917847 0 0 0 @@ -157,13 +157,13 @@ Instance Label Score Probability Log-loss Assigned 155 1 39.9500122 1 0 1 156 0 -361.30127 0 0 0 157 0 -414.214417 0 0 0 -158 0 +158 0 ? ? ? 0 159 1 214.183228 1 0 1 160 1 120.047485 1 0 1 161 0 -401.219147 0 0 0 162 0 -414.5997 0 0 0 163 0 -282.1694 0 0 0 -164 0 +164 0 ? ? ? 0 165 0 -377.536072 0 0 0 166 1 123.761658 1 0 1 167 1 -9.15014648 0.000106192965 13.201024182527696 0 @@ -234,7 +234,7 @@ Instance Label Score Probability Log-loss Assigned 232 0 -256.504028 0 0 0 233 1 -84.9973755 1.21929513E-37 122.62529213957316 0 234 0 -275.7568 0 0 0 -235 0 +235 0 ? ? ? 0 236 1 116.098267 1 0 1 237 1 26.0986938 1 0 1 238 1 370.027954 1 0 1 @@ -248,7 +248,7 @@ Instance Label Score Probability Log-loss Assigned 246 1 321.109131 1 0 1 247 1 -61.9207153 1.28284749E-27 89.332708892981643 0 248 0 -346.1557 0 0 0 -249 0 +249 0 ? ? ? 0 250 0 -354.643219 0 0 0 251 1 108.2807 1 0 1 252 0 -4.193939 0.0148625113 0.021603009492489122 0 @@ -274,7 +274,7 @@ Instance Label Score Probability Log-loss Assigned 272 1 -151.574524 0 Infinity 0 273 1 -303.688629 0 Infinity 0 274 0 -400.833862 0 0 0 -275 0 +275 0 ? ? ? 0 276 0 -407.94165 0 0 0 277 0 -407.171082 0 0 0 278 0 -413.829132 0 0 0 @@ -291,12 +291,12 @@ Instance Label Score Probability Log-loss Assigned 289 1 175.671082 1 0 1 290 0 -400.513 0 0 0 291 0 -413.829132 0 0 0 -292 1 +292 1 ? ? ? 0 293 1 51.4936523 1 0 1 -294 0 +294 0 ? ? ? 0 295 1 5.8543396 0.997140765 0.0041309123233919023 1 296 0 -173.148254 0 0 0 -297 0 +297 0 ? ? ? 0 298 0 -429.3664 0 0 0 299 1 -112.8385 0 Infinity 0 300 1 -114.377228 0 Infinity 0 @@ -314,13 +314,13 @@ Instance Label Score Probability Log-loss Assigned 312 1 -175.547363 0 Infinity 0 313 0 -400.513 0 0 0 314 0 -382.020966 0 0 0 -315 0 +315 0 ? ? ? 0 316 1 -56.5515747 2.753995E-25 81.586676422594735 0 317 1 168.155884 1 0 1 318 0 -489.2794 0 0 0 319 0 -232.038055 0 0 0 320 1 16.8273315 0.99999994 8.5991327994145617E-08 1 -321 0 +321 0 ? ? ? 0 322 0 -414.5997 0 0 0 323 1 72.79901 1 0 1 324 0 -413.829132 0 0 0 @@ -410,7 +410,7 @@ Instance Label Score Probability Log-loss Assigned 408 0 -278.598877 0 0 0 409 0 -411.365784 0 0 0 410 0 -407.171082 0 0 0 -411 0 +411 0 ? ? ? 0 412 1 21.296814 1 0 1 413 0 -418.794434 0 0 0 414 1 17.1500549 0.99999994 8.5991327994145617E-08 1 @@ -616,7 +616,7 @@ Instance Label Score Probability Log-loss Assigned 614 0 -382.40625 0 0 0 615 0 -415.175232 0 0 0 616 0 -407.94165 0 0 0 -617 0 +617 0 ? ? ? 0 618 0 -408.326935 0 0 0 619 0 -408.712219 0 0 0 620 0 -407.94165 0 0 0 diff --git a/test/BaselineOutput/SingleRelease/Transform/Concat/Concat1.tsv b/test/BaselineOutput/SingleRelease/Transform/Concat/Concat1.tsv index 36b453f708..c548a80a00 100644 --- a/test/BaselineOutput/SingleRelease/Transform/Concat/Concat1.tsv +++ b/test/BaselineOutput/SingleRelease/Transform/Concat/Concat1.tsv @@ -7,13 +7,13 @@ #@ col=f4:R4:8-** #@ } float1 float1 float1 float4.age float4.fnlwgt float4.education-num float4.capital-gain float1 -25 25 25 25 226802 7 0 25 25 226802 7 0 0 40 0 25 -38 38 38 38 89814 9 0 38 38 89814 9 0 0 50 0 38 -28 28 28 28 336951 12 0 28 28 336951 12 0 0 40 1 28 -44 44 44 44 160323 10 7688 44 44 160323 10 7688 0 40 1 44 -18 18 18 18 103497 10 0 18 18 103497 10 0 0 30 0 18 -34 34 34 34 198693 6 0 34 34 198693 6 0 0 30 0 34 -29 29 29 29 227026 9 0 29 29 227026 9 0 0 40 0 29 -63 63 63 63 104626 15 3103 63 63 104626 15 3103 0 32 1 63 -24 24 24 24 369667 10 0 24 24 369667 10 0 0 40 0 24 -55 55 55 55 104996 4 0 55 55 104996 4 0 0 10 0 55 +25 25 25 25 226802 7 0 25 25 226802 7 0 0 40 ? 0 25 +38 38 38 38 89814 9 0 38 38 89814 9 0 0 50 ? 0 38 +28 28 28 28 336951 12 0 28 28 336951 12 0 0 40 ? 1 28 +44 44 44 44 160323 10 7688 44 44 160323 10 7688 0 40 ? 1 44 +18 18 18 18 103497 10 0 18 18 103497 10 0 0 30 ? 0 18 +34 34 34 34 198693 6 0 34 34 198693 6 0 0 30 ? 0 34 +29 29 29 29 227026 9 0 29 29 227026 9 0 0 40 ? 0 29 +63 63 63 63 104626 15 3103 63 63 104626 15 3103 0 32 ? 1 63 +24 24 24 24 369667 10 0 24 24 369667 10 0 0 40 ? 0 24 +55 55 55 55 104996 4 0 55 55 104996 4 0 0 10 ? 0 55 diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs b/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs index 35859783ad..114d8c48b9 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs @@ -153,19 +153,19 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ switch (type.RawKind) { case DataKind.I1: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U1: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I2: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U2: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I4: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U4: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I8: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U8: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.R4: @@ -176,15 +176,15 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ else return GetComparerOne(r1, r2, col, EqualWithEps); case DataKind.Text: - return GetComparerOne(r1, r2, col, DvText.Identical); + return GetComparerOne>(r1, r2, col, (a, b) => a.Span.SequenceEqual(b.Span)); case DataKind.Bool: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.TimeSpan: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Ticks == y.Ticks); case DataKind.DT: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Ticks == y.Ticks); case DataKind.DZ: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); } @@ -196,19 +196,19 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ switch (type.ItemType.RawKind) { case DataKind.I1: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U1: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I2: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U2: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I4: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U4: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I8: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U8: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.R4: @@ -219,15 +219,15 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ else return GetComparerVec(r1, r2, col, size, EqualWithEps); case DataKind.Text: - return GetComparerVec(r1, r2, col, size, DvText.Identical); + return GetComparerVec>(r1, r2, col, size, (a,b) => a.Span.SequenceEqual(b.Span)); case DataKind.Bool: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.TimeSpan: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Ticks == y.Ticks); case DataKind.DT: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Ticks == y.Ticks); case DataKind.DZ: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); } diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/DataTypes.cs b/test/Microsoft.ML.Core.Tests/UnitTests/DataTypes.cs new file mode 100644 index 0000000000..7ae2384203 --- /dev/null +++ b/test/Microsoft.ML.Core.Tests/UnitTests/DataTypes.cs @@ -0,0 +1,239 @@ +using System; +using System.IO; +using System.Linq; +using System.Text; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Data.Conversion; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Runtime.RunTests +{ + public class DataTypesTest : TestDataViewBase + { + public DataTypesTest(ITestOutputHelper helper) + : base(helper) + { + } + + private readonly static Conversions _conv = Conversions.Instance; + + [Fact] + public void R4ToSBtoR4() + { + var r4ToSB = Conversions.Instance.GetStringConversion(NumberType.FromKind(DataKind.R4)); + + var txToR4 = Conversions.Instance.GetStandardConversion< ReadOnlyMemory, float>( + TextType.Instance, NumberType.FromKind(DataKind.R4), out bool identity2); + + Assert.NotNull(r4ToSB); + Assert.NotNull(txToR4); + + float fVal = float.NaN; + StringBuilder textFVal = default; + r4ToSB(ref fVal, ref textFVal); + + Assert.True("?" == textFVal.ToString()); + + fVal = 0; + var fValTX = textFVal.ToString().AsMemory(); + txToR4(ref fValTX, ref fVal); + + Assert.Equal(fVal, float.NaN); + } + + [Fact] + public void R8ToSBtoR8() + { + var r8ToSB = Conversions.Instance.GetStringConversion(NumberType.FromKind(DataKind.R8)); + + var txToR8 = Conversions.Instance.GetStandardConversion, double>( + TextType.Instance, NumberType.FromKind(DataKind.R8), out bool identity2); + + Assert.NotNull(r8ToSB); + Assert.NotNull(txToR8); + + double dVal = double.NaN; + StringBuilder textDVal = default; + r8ToSB(ref dVal, ref textDVal); + + Assert.True("?" == textDVal.ToString()); + + dVal = 0; + var dValTX = textDVal.ToString().AsMemory(); + txToR8(ref dValTX, ref dVal); + + Assert.Equal(dVal, double.NaN); + } + + [Fact] + public void TXToSByte() + { + var mapper = GetMapper, sbyte>(); + + Assert.NotNull(mapper); + + //1. sbyte.MinValue in text to sbyte. + sbyte minValue = sbyte.MinValue; + sbyte maxValue = sbyte.MaxValue; + ReadOnlyMemory src = minValue.ToString().AsMemory(); + sbyte dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, minValue); + + //2. sbyte.MaxValue in text to sbyte. + src = maxValue.ToString().AsMemory(); + dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, maxValue); + + //3. ERROR condition: sbyte.MinValue - 1 in text to sbyte. + src = (sbyte.MinValue - 1).ToString().AsMemory(); + dst = 0; + var ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to sbyte.", ex.Message); + + //4. ERROR condition: sbyte.MaxValue + 1 in text to sbyte. + src = (sbyte.MaxValue + 1).ToString().AsMemory(); + dst = 0; + ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to sbyte.", ex.Message); + + //5. Empty string in text to sbyte. + src = default; + dst = -1; + mapper(ref src, ref dst); + Assert.Equal(default, dst); + } + + [Fact] + public void TXToShort() + { + var mapper = GetMapper, short>(); + + Assert.NotNull(mapper); + + //1. short.MinValue in text to short. + short minValue = short.MinValue; + short maxValue = short.MaxValue; + ReadOnlyMemory src = minValue.ToString().AsMemory(); + short dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, minValue); + + //2. short.MaxValue in text to short. + src = maxValue.ToString().AsMemory(); + dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, maxValue); + + //3. ERROR condition: short.MinValue - 1 in text to short. + src = (minValue - 1).ToString().AsMemory(); + dst = 0; + var ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to short.", ex.Message); + + //4. ERROR condition: short.MaxValue + 1 in text to short. + src = (maxValue + 1).ToString().AsMemory(); + dst = 0; + ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to short.", ex.Message); + + //5. Empty value in text to short. + src = default; + dst = -1; + mapper(ref src, ref dst); + Assert.Equal(default, dst); + } + + [Fact] + public void TXToInt() + { + var mapper = GetMapper, int>(); + + Assert.NotNull(mapper); + + //1. int.MinValue in text to int. + int minValue = int.MinValue; + int maxValue = int.MaxValue; + ReadOnlyMemory src = minValue.ToString().AsMemory(); + int dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, minValue); + + //2. int.MaxValue in text to int. + src = maxValue.ToString().AsMemory(); + dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, maxValue); + + //3. ERROR condition: int.MinValue - 1 in text to int. + src = ((long)minValue - 1).ToString().AsMemory(); + dst = 0; + var ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to int.", ex.Message); + + //4. ERROR condition: int.MaxValue + 1 in text to int. + src = ((long)maxValue + 1).ToString().AsMemory(); + dst = 0; + ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to int.", ex.Message); + + //5. Empty value in text to int. + src = default; + dst = -1; + mapper(ref src, ref dst); + Assert.Equal(default, dst); + } + + [Fact] + public void TXToLong() + { + var mapper = GetMapper, long>(); + + Assert.NotNull(mapper); + + //1. long.MinValue in text to long. + var minValue = long.MinValue; + var maxValue = long.MaxValue; + ReadOnlyMemory src = minValue.ToString().AsMemory(); + var dst = default(long); + mapper(ref src, ref dst); + Assert.Equal(dst, minValue); + + //2. long.MaxValue in text to long. + src = maxValue.ToString().AsMemory(); + dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, maxValue); + + //3. long.MinValue - 1 in text to long. + src = (minValue - 1).ToString().AsMemory(); + dst = 0; + mapper(ref src, ref dst); + Assert.Equal(dst, (long)minValue - 1); + + //4. ERROR condition: long.MaxValue + 1 in text to long. + src = ((ulong)maxValue + 1).ToString().AsMemory(); + dst = 0; + var ex = Assert.ThrowsAny(() => mapper(ref src, ref dst)); + Assert.Equal("Value could not be parsed from text to long.", ex.Message); + + //5. Empty value in text to long. + src = default; + dst = -1; + mapper(ref src, ref dst); + Assert.Equal(default, dst); + } + + public ValueMapper GetMapper() + { + Assert.True(typeof(TDst).TryGetDataKind(out DataKind dstDataKind)); + + return Conversions.Instance.GetStandardConversion( + TextType.Instance, NumberType.FromKind(dstDataKind), out bool identity); + } + } +} + + diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/DvTypes.cs b/test/Microsoft.ML.Core.Tests/UnitTests/DvTypes.cs deleted file mode 100644 index a3f5d8231b..0000000000 --- a/test/Microsoft.ML.Core.Tests/UnitTests/DvTypes.cs +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Xunit; -namespace Microsoft.ML.Runtime.RunTests -{ - public sealed class DvTypeTests - { - [Fact] - public void TestComparableDvInt4() - { - const int count = 100; - - var rand = RandomUtils.Create(42); - var values = new DvInt4[2 * count]; - for (int i = 0; i < count; i++) - { - var v = values[i] = rand.Next(); - values[values.Length - i - 1] = v; - } - - // Assign two NA's at random. - int iv1 = rand.Next(values.Length); - int iv2 = rand.Next(values.Length - 1); - if (iv2 >= iv1) - iv2++; - values[iv1] = DvInt4.NA; - values[iv2] = DvInt4.NA; - Array.Sort(values); - - Assert.True(values[0].IsNA); - Assert.True(values[1].IsNA); - Assert.True(!values[2].IsNA); - - Assert.True((values[0] == values[1]).IsNA); - Assert.True((values[0] != values[1]).IsNA); - Assert.True((values[0] <= values[1]).IsNA); - Assert.True(values[0].Equals(values[1])); - Assert.True(values[0].CompareTo(values[1]) == 0); - - Assert.True((values[1] == values[2]).IsNA); - Assert.True((values[1] != values[2]).IsNA); - Assert.True((values[1] <= values[2]).IsNA); - Assert.True(!values[1].Equals(values[2])); - Assert.True(values[1].CompareTo(values[2]) < 0); - - for (int i = 3; i < values.Length; i++) - { - DvBool eq = values[i - 1] == values[i]; - DvBool ne = values[i - 1] != values[i]; - DvBool le = values[i - 1] <= values[i]; - bool feq = values[i - 1].Equals(values[i]); - int cmp = values[i - 1].CompareTo(values[i]); - Assert.True(!eq.IsNA); - Assert.True(!ne.IsNA); - Assert.True(eq.IsTrue == ne.IsFalse); - Assert.True(le.IsTrue); - Assert.True(feq == eq.IsTrue); - Assert.True(cmp <= 0); - Assert.True(feq == (cmp == 0)); - } - } - - [Fact] - public void TestComparableDvText() - { - const int count = 100; - - var rand = RandomUtils.Create(42); - var chars = new char[2000]; - for (int i = 0; i < chars.Length; i++) - chars[i] = (char)rand.Next(128); - var str = new string(chars); - - var values = new DvText[2 * count]; - for (int i = 0; i < count; i++) - { - int len = rand.Next(20); - int ich = rand.Next(str.Length - len + 1); - var v = values[i] = new DvText(str, ich, ich + len); - values[values.Length - i - 1] = v; - } - - // Assign two NA's and an empty at random. - int iv1 = rand.Next(values.Length); - int iv2 = rand.Next(values.Length - 1); - if (iv2 >= iv1) - iv2++; - int iv3 = rand.Next(values.Length - 2); - if (iv3 >= iv1) - iv3++; - if (iv3 >= iv2) - iv3++; - - values[iv1] = DvText.NA; - values[iv2] = DvText.NA; - values[iv3] = DvText.Empty; - Array.Sort(values); - - Assert.True(values[0].IsNA); - Assert.True(values[1].IsNA); - Assert.True(values[2].IsEmpty); - - Assert.True((values[0] == values[1]).IsNA); - Assert.True((values[0] != values[1]).IsNA); - Assert.True(values[0].Equals(values[1])); - Assert.True(values[0].CompareTo(values[1]) == 0); - - Assert.True((values[1] == values[2]).IsNA); - Assert.True((values[1] != values[2]).IsNA); - Assert.True(!values[1].Equals(values[2])); - Assert.True(values[1].CompareTo(values[2]) < 0); - - for (int i = 3; i < values.Length; i++) - { - DvBool eq = values[i - 1] == values[i]; - DvBool ne = values[i - 1] != values[i]; - bool feq = values[i - 1].Equals(values[i]); - int cmp = values[i - 1].CompareTo(values[i]); - Assert.True(!eq.IsNA); - Assert.True(!ne.IsNA); - Assert.True(eq.IsTrue == ne.IsFalse); - Assert.True(feq == eq.IsTrue); - Assert.True(cmp <= 0); - Assert.True(feq == (cmp == 0)); - } - } - } -} diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index ede98dd425..b3d7068041 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.TestFramework; +using System; using System.Collections.Generic; using System.Linq; using Xunit; @@ -344,11 +345,10 @@ public void TestCrossValidationMacro() using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol || col == isWeightedCol)) { var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter(foldCol); - var isWeightedGetter = cursor.GetGetter(isWeightedCol); - DvText fold = default; - DvBool isWeighted = default; - + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; + var isWeightedGetter = cursor.GetGetter(isWeightedCol); + bool isWeighted = default; double avg = 0; double weightedAvg = 0; for (int w = 0; w < 2; w++) @@ -361,9 +361,9 @@ public void TestCrossValidationMacro() else getter(ref avg); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Average")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); isWeightedGetter(ref isWeighted); - Assert.True(isWeighted.IsTrue == (w == 1)); + Assert.True(isWeighted == (w == 1)); // Get the standard deviation. b = cursor.MoveNext(); @@ -371,13 +371,13 @@ public void TestCrossValidationMacro() double stdev = 0; getter(ref stdev); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Standard Deviation")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); if (w == 1) Assert.Equal(1.584696, stdev, 6); else Assert.Equal(1.385165, stdev, 6); isWeightedGetter(ref isWeighted); - Assert.True(isWeighted.IsTrue == (w == 1)); + Assert.True(isWeighted == (w == 1)); } double sum = 0; double weightedSum = 0; @@ -394,9 +394,9 @@ public void TestCrossValidationMacro() weightedSum += val; else sum += val; - Assert.True(fold.EqualsStr("Fold " + f)); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); isWeightedGetter(ref isWeighted); - Assert.True(isWeighted.IsTrue == (w == 1)); + Assert.True(isWeighted == (w == 1)); } } Assert.Equal(weightedAvg, weightedSum / 2); @@ -460,16 +460,16 @@ public void TestCrossValidationMacroWithMultiClass() using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) { var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter(foldCol); - DvText fold = default; + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; - // Get the verage. + // Get the average. b = cursor.MoveNext(); Assert.True(b); double avg = 0; getter(ref avg); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Average")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); // Get the standard deviation. b = cursor.MoveNext(); @@ -477,7 +477,7 @@ public void TestCrossValidationMacroWithMultiClass() double stdev = 0; getter(ref stdev); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Standard Deviation")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); Assert.Equal(0.025, stdev, 3); double sum = 0; @@ -489,7 +489,7 @@ public void TestCrossValidationMacroWithMultiClass() getter(ref val); foldGetter(ref fold); sum += val; - Assert.True(fold.EqualsStr("Fold " + f)); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); } Assert.Equal(avg, sum / 2); b = cursor.MoveNext(); @@ -504,15 +504,15 @@ public void TestCrossValidationMacroWithMultiClass() Assert.True(b); var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, countCol); Assert.True(type != null && type.ItemType.IsText && type.VectorSize == 10); - var slotNames = default(VBuffer); + var slotNames = default(VBuffer>); schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countCol, ref slotNames); - Assert.True(slotNames.Values.Select((s, i) => s.EqualsStr(i.ToString())).All(x => x)); + Assert.True(slotNames.Values.Select((s, i) => ReadOnlyMemoryUtils.EqualsStr(i.ToString(), s)).All(x => x)); using (var curs = confusion.GetRowCursor(col => true)) { var countGetter = curs.GetGetter>(countCol); - var foldGetter = curs.GetGetter(foldCol); + var foldGetter = curs.GetGetter>(foldCol); var confCount = default(VBuffer); - var foldIndex = default(DvText); + var foldIndex = default(ReadOnlyMemory); int rowCount = 0; var foldCur = "Fold 0"; while (curs.MoveNext()) @@ -520,7 +520,7 @@ public void TestCrossValidationMacroWithMultiClass() countGetter(ref confCount); foldGetter(ref foldIndex); rowCount++; - Assert.True(foldIndex.EqualsStr(foldCur)); + Assert.True(ReadOnlyMemoryUtils.EqualsStr(foldCur, foldIndex)); if (rowCount == 10) { rowCount = 0; @@ -598,11 +598,11 @@ public void TestCrossValidationMacroMultiClassWithWarnings() Assert.True(b); using (var cursor = warnings.GetRowCursor(col => col == warningCol)) { - var getter = cursor.GetGetter(warningCol); + var getter = cursor.GetGetter>(warningCol); b = cursor.MoveNext(); Assert.True(b); - var warning = default(DvText); + var warning = default(ReadOnlyMemory); getter(ref warning); Assert.Contains("test instances with class values not seen in the training set.", warning.ToString()); b = cursor.MoveNext(); @@ -673,8 +673,8 @@ public void TestCrossValidationMacroWithStratification() using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) { var getter = cursor.GetGetter(metricCol); - var foldGetter = cursor.GetGetter(foldCol); - DvText fold = default; + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; // Get the verage. b = cursor.MoveNext(); @@ -682,7 +682,7 @@ public void TestCrossValidationMacroWithStratification() double avg = 0; getter(ref avg); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Average")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); // Get the standard deviation. b = cursor.MoveNext(); @@ -690,7 +690,7 @@ public void TestCrossValidationMacroWithStratification() double stdev = 0; getter(ref stdev); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Standard Deviation")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); Assert.Equal(0.00485, stdev, 5); double sum = 0; @@ -702,7 +702,7 @@ public void TestCrossValidationMacroWithStratification() getter(ref val); foldGetter(ref fold); sum += val; - Assert.True(fold.EqualsStr("Fold " + f)); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); } Assert.Equal(avg, sum / 2); b = cursor.MoveNext(); @@ -781,8 +781,8 @@ public void TestCrossValidationMacroWithNonDefaultNames() using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) { var getter = cursor.GetGetter>(metricCol); - var foldGetter = cursor.GetGetter(foldCol); - DvText fold = default; + var foldGetter = cursor.GetGetter>(foldCol); + ReadOnlyMemory fold = default; // Get the verage. b = cursor.MoveNext(); @@ -790,7 +790,7 @@ public void TestCrossValidationMacroWithNonDefaultNames() var avg = default(VBuffer); getter(ref avg); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Average")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); // Get the standard deviation. b = cursor.MoveNext(); @@ -798,7 +798,7 @@ public void TestCrossValidationMacroWithNonDefaultNames() var stdev = default(VBuffer); getter(ref stdev); foldGetter(ref fold); - Assert.True(fold.EqualsStr("Standard Deviation")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); Assert.Equal(2.462, stdev.Values[0], 3); Assert.Equal(2.763, stdev.Values[1], 3); Assert.Equal(3.273, stdev.Values[2], 3); @@ -813,7 +813,7 @@ public void TestCrossValidationMacroWithNonDefaultNames() getter(ref val); foldGetter(ref fold); sumBldr.AddFeatures(0, ref val); - Assert.True(fold.EqualsStr("Fold " + f)); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); } var sum = default(VBuffer); sumBldr.GetResult(ref sum); @@ -827,12 +827,12 @@ public void TestCrossValidationMacroWithNonDefaultNames() Assert.True(data.Schema.TryGetColumnIndex("Instance", out int nameCol)); using (var cursor = data.GetRowCursor(col => col == nameCol)) { - var getter = cursor.GetGetter(nameCol); + var getter = cursor.GetGetter>(nameCol); while (cursor.MoveNext()) { - DvText name = default; + ReadOnlyMemory name = default; getter(ref name); - Assert.Subset(new HashSet() { new DvText("Private"), new DvText("?"), new DvText("Federal-gov") }, new HashSet() { name }); + Assert.Subset(new HashSet() { "Private", "?", "Federal-gov" }, new HashSet() { name.ToString() }); if (cursor.Position > 4) break; } diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 1b4e233542..d674d99c61 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -825,9 +825,9 @@ public void EntryPointPipelineEnsemble() Assert.True(hasScoreCol, "Data scored with binary ensemble does not have a score column"); var type = binaryScored.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex); Assert.True(type != null && type.IsText, "Binary ensemble scored data does not have correct type of metadata."); - var kind = default(DvText); + var kind = default(ReadOnlyMemory); binaryScored.Schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex, ref kind); - Assert.True(kind.EqualsStr(MetadataUtils.Const.ScoreColumnKind.BinaryClassification), + Assert.True(ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreColumnKind.BinaryClassification, kind), $"Binary ensemble scored data column type should be '{MetadataUtils.Const.ScoreColumnKind.BinaryClassification}', but is instead '{kind}'"); hasScoreCol = regressionScored.Schema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out scoreIndex); @@ -835,7 +835,7 @@ public void EntryPointPipelineEnsemble() type = regressionScored.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex); Assert.True(type != null && type.IsText, "Regression ensemble scored data does not have correct type of metadata."); regressionScored.Schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex, ref kind); - Assert.True(kind.EqualsStr(MetadataUtils.Const.ScoreColumnKind.Regression), + Assert.True(ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreColumnKind.Regression, kind), $"Regression ensemble scored data column type should be '{MetadataUtils.Const.ScoreColumnKind.Regression}', but is instead '{kind}'"); hasScoreCol = anomalyScored.Schema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out scoreIndex); @@ -843,7 +843,7 @@ public void EntryPointPipelineEnsemble() type = anomalyScored.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex); Assert.True(type != null && type.IsText, "Anomaly detection ensemble scored data does not have correct type of metadata."); anomalyScored.Schema.GetMetadata(MetadataUtils.Kinds.ScoreColumnKind, scoreIndex, ref kind); - Assert.True(kind.EqualsStr(MetadataUtils.Const.ScoreColumnKind.AnomalyDetection), + Assert.True(ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreColumnKind.AnomalyDetection, kind), $"Anomaly detection ensemble scored data column type should be '{MetadataUtils.Const.ScoreColumnKind.AnomalyDetection}', but is instead '{kind}'"); var modelPath = DeleteOutputPath("SavePipe", "PipelineEnsembleModel.zip"); @@ -975,13 +975,13 @@ public void EntryPointPipelineEnsembleText() InputFile = inputFile }).Data; - ValueMapper labelToBinary = - (ref DvText src, ref DvBool dst) => + ValueMapper, bool> labelToBinary = + (ref ReadOnlyMemory src, ref bool dst) => { - if (src.EqualsStr("Sport")) - dst = DvBool.True; + if (ReadOnlyMemoryUtils.EqualsStr("Sport", src)) + dst = true; else - dst = DvBool.False; + dst = false; }; dataView = LambdaColumnMapper.Create(Env, "TextToBinaryLabel", dataView, "Label", "Label", TextType.Instance, BoolType.Instance, labelToBinary); @@ -1535,16 +1535,16 @@ public void EntryPointTextToKeyToText() { using (var cursor = loader.GetRowCursor(col => true)) { - DvText cat = default(DvText); - DvText catValue = default(DvText); + ReadOnlyMemory cat = default; + ReadOnlyMemory catValue = default; uint catKey = 0; bool success = loader.Schema.TryGetColumnIndex("Cat", out int catCol); Assert.True(success); - var catGetter = cursor.GetGetter(catCol); + var catGetter = cursor.GetGetter>(catCol); success = loader.Schema.TryGetColumnIndex("CatValue", out int catValueCol); Assert.True(success); - var catValueGetter = cursor.GetGetter(catValueCol); + var catValueGetter = cursor.GetGetter>(catValueCol); success = loader.Schema.TryGetColumnIndex("Key", out int keyCol); Assert.True(success); var keyGetter = cursor.GetGetter(keyCol); @@ -1968,7 +1968,6 @@ public void EntryPointConvert() { "Transforms.ColumnTypeConverter", "Transforms.ColumnTypeConverter", - "Transforms.ColumnTypeConverter", }, new[] { @@ -1984,7 +1983,7 @@ public void EntryPointConvert() { 'Name': 'Feat', 'Source': 'FT', - 'Type': 'I1' + 'Type': 'R4' }, { 'Name': 'Key1', @@ -1994,18 +1993,11 @@ public void EntryPointConvert() ]", @"'Column': [ { - 'Name': 'Ints', + 'Name': 'Doubles', 'Source': 'Feat' } ], - 'Type': 'I4'", - @"'Column': [ - { - 'Name': 'Floats', - 'Source': 'Ints' - } - ], - 'Type': 'Num'", + 'Type': 'R8'", }); } @@ -3606,18 +3598,18 @@ public void EntryPointPrepareLabelConvertPredictedLabel() { using (var cursor = loader.GetRowCursor(col => true)) { - DvText predictedLabel = default(DvText); + ReadOnlyMemory predictedLabel = default; var success = loader.Schema.TryGetColumnIndex("PredictedLabel", out int predictedLabelCol); Assert.True(success); - var predictedLabelGetter = cursor.GetGetter(predictedLabelCol); + var predictedLabelGetter = cursor.GetGetter>(predictedLabelCol); while (cursor.MoveNext()) { predictedLabelGetter(ref predictedLabel); - Assert.True(predictedLabel.EqualsStr("Iris-setosa") - || predictedLabel.EqualsStr("Iris-versicolor") - || predictedLabel.EqualsStr("Iris-virginica")); + Assert.True(ReadOnlyMemoryUtils.EqualsStr("Iris-setosa", predictedLabel) + || ReadOnlyMemoryUtils.EqualsStr("Iris-versicolor", predictedLabel) + || ReadOnlyMemoryUtils.EqualsStr("Iris-virginica", predictedLabel)); } } } diff --git a/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs b/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs index b0921cc2e3..f4688c8016 100644 --- a/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs +++ b/test/Microsoft.ML.FSharp.Tests/SmokeTests.fs @@ -128,6 +128,7 @@ module SmokeTest1 = Assert.Equal(predictionResults, [ false; true; true ]) module SmokeTest2 = + open System [] type SentimentData = @@ -199,7 +200,7 @@ module SmokeTest3 = type SentimentData() = [] - member val SentimentText = "" with get, set + member val SentimentText = "".AsMemory() with get, set [] member val Sentiment = 0.0 with get, set @@ -253,9 +254,9 @@ module SmokeTest3 = let model = pipeline.Train() let predictions = - [ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.") - SentimentData(SentimentText = "Sort of ok") - SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.") ] + [ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.".AsMemory()) + SentimentData(SentimentText = "Sort of ok".AsMemory()) + SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.".AsMemory()) ] |> model.Predict let predictionResults = [ for p in predictions -> p.Sentiment ] diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index a6ea639d99..4de917921b 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -655,23 +655,23 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr { var scoreGetter = curs.GetGetter(scoreCol); var probGetter = curs.GetGetter(probCol); - var predGetter = curs.GetGetter(predCol); + var predGetter = curs.GetGetter(predCol); var scoreGetters = new ValueGetter[3]; var probGetters = new ValueGetter[3]; - var predGetters = new ValueGetter[3]; + var predGetters = new ValueGetter[3]; for (int i = 0; i < 3; i++) { scoreGetters[i] = cursors[i].GetGetter(scoreColArray[i]); probGetters[i] = cursors[i].GetGetter(probColArray[i]); - predGetters[i] = cursors[i].GetGetter(predColArray[i]); + predGetters[i] = cursors[i].GetGetter(predColArray[i]); } float score = 0; float prob = 0; - var pred = default(DvBool); + bool pred = default; var scores = new float[3]; var probs = new float[3]; - var preds = new DvBool[3]; + var preds = new bool[3]; while (curs.MoveNext()) { scoreGetter(ref score); @@ -686,7 +686,7 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr } Assert.Equal(score, 0.4 * scores.Sum() / 3, 5); Assert.Equal(prob, 1 / (1 + Math.Exp(-score)), 6); - Assert.True(pred.IsTrue == score > 0); + Assert.True(pred == score > 0); } } } diff --git a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs index b5b53677ab..4ce87f070b 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs @@ -149,19 +149,19 @@ public void TransposerTest() ArrayDataViewBuilder builder = new ArrayDataViewBuilder(Env); // A is to check the splitting of a sparse-ish column. - var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (DvInt4)rgen.Next(), 50, 5, 10, 15); - dataA[rowCount / 2] = new VBuffer(50, 0, null, null); // Coverage for the null vbuffer case. + var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (int)rgen.Next(), 50, 5, 10, 15); + dataA[rowCount / 2] = new VBuffer(50, 0, null, null); // Coverage for the null vbuffer case. builder.AddColumn("A", NumberType.I4, dataA); // B is to check the splitting of a dense-ish column. builder.AddColumn("B", NumberType.R8, GenerateHelper(rowCount, 0.8, rgen, rgen.NextDouble, 50, 0, 25, 49)); // C is to just have some column we do nothing with. - builder.AddColumn("C", NumberType.I2, GenerateHelper(rowCount, 0.1, rgen, () => (DvInt2)1, 30, 3, 10, 24)); + builder.AddColumn("C", NumberType.I2, GenerateHelper(rowCount, 0.1, rgen, () => (short)1, 30, 3, 10, 24)); // D is to check some column we don't have to split because it's sufficiently small. builder.AddColumn("D", NumberType.R8, GenerateHelper(rowCount, 0.1, rgen, rgen.NextDouble, 3, 1)); // E is to check a sparse scalar column. builder.AddColumn("E", NumberType.U4, GenerateHelper(rowCount, 0.1, rgen, () => (uint)rgen.Next(int.MinValue, int.MaxValue))); // F is to check a dense-ish scalar column. - builder.AddColumn("F", NumberType.I4, GenerateHelper(rowCount, 0.8, rgen, () => (DvInt4)rgen.Next())); + builder.AddColumn("F", NumberType.I4, GenerateHelper(rowCount, 0.8, rgen, () => rgen.Next())); IDataView view = builder.GetDataView(); @@ -182,11 +182,11 @@ public void TransposerTest() } // Check the contents Assert.Null(trans.TransposeSchema.GetSlotType(2)); // C check to see that it's not transposable. - TransposeCheckHelper(view, 0, trans); // A check. + TransposeCheckHelper(view, 0, trans); // A check. TransposeCheckHelper(view, 1, trans); // B check. TransposeCheckHelper(view, 3, trans); // D check. TransposeCheckHelper(view, 4, trans); // E check. - TransposeCheckHelper(view, 5, trans); // F check. + TransposeCheckHelper(view, 5, trans); // F check. } // Force save. Recheck columns that would have previously been passthrough columns. @@ -201,7 +201,7 @@ public void TransposerTest() Assert.Null(trans.TransposeSchema.GetSlotType(2)); TransposeCheckHelper(view, 3, trans); // D check. TransposeCheckHelper(view, 4, trans); // E check. - TransposeCheckHelper(view, 5, trans); // F check. + TransposeCheckHelper(view, 5, trans); // F check. } } @@ -214,19 +214,19 @@ public void TransposerSaverLoaderTest() ArrayDataViewBuilder builder = new ArrayDataViewBuilder(Env); // A is to check the splitting of a sparse-ish column. - var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (DvInt4)rgen.Next(), 50, 5, 10, 15); - dataA[rowCount / 2] = new VBuffer(50, 0, null, null); // Coverage for the null vbuffer case. + var dataA = GenerateHelper(rowCount, 0.1, rgen, () => (int)rgen.Next(), 50, 5, 10, 15); + dataA[rowCount / 2] = new VBuffer(50, 0, null, null); // Coverage for the null vbuffer case. builder.AddColumn("A", NumberType.I4, dataA); // B is to check the splitting of a dense-ish column. builder.AddColumn("B", NumberType.R8, GenerateHelper(rowCount, 0.8, rgen, rgen.NextDouble, 50, 0, 25, 49)); // C is to just have some column we do nothing with. - builder.AddColumn("C", NumberType.I2, GenerateHelper(rowCount, 0.1, rgen, () => (DvInt2)1, 30, 3, 10, 24)); + builder.AddColumn("C", NumberType.I2, GenerateHelper(rowCount, 0.1, rgen, () => (short)1, 30, 3, 10, 24)); // D is to check some column we don't have to split because it's sufficiently small. builder.AddColumn("D", NumberType.R8, GenerateHelper(rowCount, 0.1, rgen, rgen.NextDouble, 3, 1)); // E is to check a sparse scalar column. builder.AddColumn("E", NumberType.U4, GenerateHelper(rowCount, 0.1, rgen, () => (uint)rgen.Next(int.MinValue, int.MaxValue))); // F is to check a dense-ish scalar column. - builder.AddColumn("F", NumberType.I4, GenerateHelper(rowCount, 0.8, rgen, () => (DvInt4)rgen.Next())); + builder.AddColumn("F", NumberType.I4, GenerateHelper(rowCount, 0.8, rgen, () => (int)rgen.Next())); IDataView view = builder.GetDataView(); @@ -241,12 +241,12 @@ public void TransposerSaverLoaderTest() // First check whether this as an IDataView yields the same values. CheckSameValues(view, loader); - TransposeCheckHelper(view, 0, loader); // A + TransposeCheckHelper(view, 0, loader); // A TransposeCheckHelper(view, 1, loader); // B - TransposeCheckHelper(view, 2, loader); // C + TransposeCheckHelper(view, 2, loader); // C TransposeCheckHelper(view, 3, loader); // D TransposeCheckHelper(view, 4, loader); // E - TransposeCheckHelper(view, 5, loader); // F + TransposeCheckHelper(view, 5, loader); // F Done(); } diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs index 369750c844..8d7fb18cd4 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -80,12 +80,11 @@ public void SimpleTextLoaderCopyColumnsTest() // Next actually inspect the data. using (var cursor = textData.GetRowCursor(c => true)) { - var labelGetter = cursor.GetGetter(labelIdx); - var textGetter = cursor.GetGetter(textIdx); + var textGetter = cursor.GetGetter>(textIdx); var numericFeaturesGetter = cursor.GetGetter>(numericFeaturesIdx); - - DvBool labelVal = default; - DvText textVal = default; + ReadOnlyMemory textVal = default; + var labelGetter = cursor.GetGetter(labelIdx); + bool labelVal = default; VBuffer numVal = default; void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2) @@ -93,9 +92,8 @@ void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2) labelGetter(ref labelVal); textGetter(ref textVal); numericFeaturesGetter(ref numVal); - - Assert.Equal((DvBool)bl, labelVal); - Assert.Equal(new DvText(tx), textVal); + Assert.True(tx.AsSpan().SequenceEqual(textVal.Span)); + Assert.Equal((bool)bl, labelVal); Assert.Equal(3, numVal.Length); Assert.Equal(v0, numVal.GetItemOrDefault(0)); Assert.Equal(v1, numVal.GetItemOrDefault(1)); @@ -159,13 +157,13 @@ public void AssertStaticKeys() var counted = new MetaCounted(); // We'll test a few things here. First, the case where the key-value metadata is text. - var metaValues1 = new VBuffer(3, new[] { new DvText("a"), new DvText("b"), new DvText("c") }); + var metaValues1 = new VBuffer>(3, new[] { "a".AsMemory(), "b".AsMemory(), "c".AsMemory() }); var meta1 = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(TextType.Instance, 3), ref metaValues1); uint value1 = 2; var col1 = RowColumnUtils.GetColumn("stay", new KeyType(DataKind.U4, 0, 3), ref value1, RowColumnUtils.GetRow(counted, meta1)); // Next the case where those values are ints. - var metaValues2 = new VBuffer(3, new DvInt4[] { 1, 2, 3, 4 }); + var metaValues2 = new VBuffer(3, new int[] { 1, 2, 3, 4 }); var meta2 = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(NumberType.I4, 4), ref metaValues2); var value2 = new VBuffer(2, 0, null, null); var col2 = RowColumnUtils.GetColumn("awhile", new VectorType(new KeyType(DataKind.U1, 2, 4), 2), ref value2, RowColumnUtils.GetRow(counted, meta2)); diff --git a/test/Microsoft.ML.TestFramework/DataPipe/Parquet.cs b/test/Microsoft.ML.TestFramework/DataPipe/Parquet.cs index f5be433b3e..26f265bf9e 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/Parquet.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/Parquet.cs @@ -2,19 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Float = System.Single; - using System; -using System.Collections.Generic; -using System.IO; -using Microsoft.ML.Runtime.CommandLine; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Data.IO; -using Microsoft.ML.Runtime.Internal.Utilities; -using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Runtime.TextAnalytics; using Xunit; -using System.Runtime.InteropServices; namespace Microsoft.ML.Runtime.RunTests { @@ -33,7 +22,8 @@ public void TestParquetPrimitiveDataTypes() public void TestParquetNull() { string pathData = GetDataPath(@"Parquet", "test-null.parquet"); - TestCore(pathData, false, new[] { "loader=Parquet{bigIntDates=+}" }, forceDense: true); + var ex = Assert.Throws(() => TestCore(pathData, false, new[] { "loader=Parquet{bigIntDates=+}" }, forceDense: true)); + Assert.Equal("Nullable object must have a value.", ex.Message); Done(); } } diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs index 40b29a498e..0ec0e35341 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs @@ -612,8 +612,8 @@ protected bool CheckSameSchemas(ISchema sch1, ISchema sch2, bool exactTypes = tr protected bool CheckMetadataNames(string kind, int size, ISchema sch1, ISchema sch2, int col, bool exactTypes, bool mustBeText) { - var names1 = default(VBuffer); - var names2 = default(VBuffer); + var names1 = default(VBuffer>); + var names2 = default(VBuffer>); var t1 = sch1.GetMetadataTypeOrNull(kind, col); var t2 = sch2.GetMetadataTypeOrNull(kind, col); @@ -654,7 +654,7 @@ protected bool CheckMetadataNames(string kind, int size, ISchema sch1, ISchema s sch1.GetMetadata(kind, col, ref names1); sch2.GetMetadata(kind, col, ref names2); - if (!CompareVec(ref names1, ref names2, size, DvText.Identical)) + if (!CompareVec(ref names1, ref names2, size, (a, b) => a.Span.SequenceEqual(b.Span))) { Fail("Different {0} metadata values", kind); return Failed(); @@ -662,7 +662,7 @@ protected bool CheckMetadataNames(string kind, int size, ISchema sch1, ISchema s return true; } - protected bool CheckMetadataCallFailure(string kind, ISchema sch, int col, ref VBuffer names) + protected bool CheckMetadataCallFailure(string kind, ISchema sch, int col, ref VBuffer> names) { try { @@ -1059,19 +1059,19 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ switch (type.RawKind) { case DataKind.I1: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U1: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I2: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U2: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I4: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U4: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.I8: - return GetComparerOne(r1, r2, col, (x, y) => x.RawValue == y.RawValue); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.U8: return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.R4: @@ -1082,15 +1082,15 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ else return GetComparerOne(r1, r2, col, EqualWithEps); case DataKind.Text: - return GetComparerOne(r1, r2, col, DvText.Identical); + return GetComparerOne>(r1, r2, col, (a ,b) => a.Span.SequenceEqual(b.Span)); case DataKind.Bool: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.TimeSpan: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.DT: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.DZ: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case (DataKind)0: @@ -1105,19 +1105,19 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ switch (type.ItemType.RawKind) { case DataKind.I1: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U1: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I2: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U2: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I4: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U4: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.I8: - return GetComparerVec(r1, r2, col, size, (x, y) => x.RawValue == y.RawValue); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.U8: return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.R4: @@ -1128,15 +1128,15 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ else return GetComparerVec(r1, r2, col, size, EqualWithEps); case DataKind.Text: - return GetComparerVec(r1, r2, col, size, DvText.Identical); + return GetComparerVec>(r1, r2, col, size, (a, b) => a.Span.SequenceEqual(b.Span)); case DataKind.Bool: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.TimeSpan: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.DT: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.DZ: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); } diff --git a/test/Microsoft.ML.TestFramework/TestCommandBase.cs b/test/Microsoft.ML.TestFramework/TestCommandBase.cs index 5338b1e2db..a0c72a15ab 100644 --- a/test/Microsoft.ML.TestFramework/TestCommandBase.cs +++ b/test/Microsoft.ML.TestFramework/TestCommandBase.cs @@ -2031,15 +2031,16 @@ public void CommandTrainingBinaryFieldAwareFactorizationMachineWithValidationAnd } [Fact] - public void DataTypes() + public void Datatypes() { - //Skip for linux because DATE/TIME format is different. - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - return; - string idvPath = GetDataPath("datatypes.idv"); + OutputPath intermediateData = CreateOutputPath("intermediateDatatypes.idv"); OutputPath textOutputPath = CreateOutputPath("datatypes.txt"); TestCore("savedata", idvPath, "loader=binary", "saver=text", textOutputPath.Arg("dout")); + _step++; + TestCore("savedata", idvPath, "loader=binary", "saver=binary", intermediateData.ArgOnly("dout")); + _step++; + TestCore("savedata", intermediateData.Path, "loader=binary", "saver=text", textOutputPath.Arg("dout")); Done(); } } diff --git a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs index ad21acb4ac..5c8301fb7a 100644 --- a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs +++ b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs @@ -1,9 +1,10 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; +using System; using Xunit; using Xunit.Abstractions; @@ -34,11 +35,11 @@ private class SparseExample public void SparseDataView() { GenericSparseDataView(new[] { 1f, 2f, 3f }, new[] { 1f, 10f, 100f }); - GenericSparseDataView(new DvInt4[] { 1, 2, 3 }, new DvInt4[] { 1, 10, 100 }); - GenericSparseDataView(new DvBool[] { true, true, true }, new DvBool[] { false, false, false }); + GenericSparseDataView(new int[] { 1, 2, 3 }, new int[] { 1, 10, 100 }); + GenericSparseDataView(new bool[] { true, true, true }, new bool[] { false, false, false }); GenericSparseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); - GenericSparseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, - new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); + GenericSparseDataView(new ReadOnlyMemory[] { "a".AsMemory(), "b".AsMemory(), "c".AsMemory() }, + new ReadOnlyMemory[] { "aa".AsMemory(), "bb".AsMemory(), "cc".AsMemory() }); } private void GenericSparseDataView(T[] v1, T[] v2) @@ -76,11 +77,11 @@ private void GenericSparseDataView(T[] v1, T[] v2) public void DenseDataView() { GenericDenseDataView(new[] { 1f, 2f, 3f }, new[] { 1f, 10f, 100f }); - GenericDenseDataView(new DvInt4[] { 1, 2, 3 }, new DvInt4[] { 1, 10, 100 }); - GenericDenseDataView(new DvBool[] { true, true, true }, new DvBool[] { false, false, false }); + GenericDenseDataView(new int[] { 1, 2, 3 }, new int[] { 1, 10, 100 }); + GenericDenseDataView(new bool[] { true, true, true }, new bool[] { false, false, false }); GenericDenseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); - GenericDenseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, - new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); + GenericDenseDataView(new ReadOnlyMemory[] { "a".AsMemory(), "b".AsMemory(), "c".AsMemory() }, + new ReadOnlyMemory[] { "aa".AsMemory(), "bb".AsMemory(), "cc".AsMemory() }); } private void GenericDenseDataView(T[] v1, T[] v2) diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index 5bf4edf26b..d16a9924f1 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Runtime.RunTests; -using Microsoft.ML.TestFramework; using System.IO; using Xunit; using Xunit.Abstractions; diff --git a/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs b/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs index 3749676dcd..2e3b71ab01 100644 --- a/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs +++ b/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -94,7 +94,7 @@ public void CanSuccessfullyEnumerated() using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter(0); - var TextGetter = cursor.GetGetter(1); + var TextGetter = cursor.GetGetter>(1); Assert.True(cursor.MoveNext()); @@ -102,7 +102,7 @@ public void CanSuccessfullyEnumerated() IDGetter(ref ID); Assert.Equal(1, ID); - DvText Text = new DvText(); + ReadOnlyMemory Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("1", Text.ToString()); @@ -112,7 +112,7 @@ public void CanSuccessfullyEnumerated() IDGetter(ref ID); Assert.Equal(2, ID); - Text = new DvText(); + Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("2", Text.ToString()); @@ -122,7 +122,7 @@ public void CanSuccessfullyEnumerated() IDGetter(ref ID); Assert.Equal(3, ID); - Text = new DvText(); + Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("3", Text.ToString()); @@ -294,29 +294,13 @@ public class ConversionSimpleClass public float fFloat; public double fDouble; public bool fBool; - public string fString; - } - - public class ConversionNullalbeClass - { - public int? fInt; - public uint? fuInt; - public short? fShort; - public ushort? fuShort; - public sbyte? fsByte; - public byte? fByte; - public long? fLong; - public ulong? fuLong; - public float? fFloat; - public double? fDouble; - public bool? fBool; - public string fString; + public string fString=""; } public bool CompareObjectValues(object x, object y, Type type) { - // By default behaviour for DvText is to be empty string, while for string is null. - // So if we do roundtrip string-> DvText -> string all null string become empty strings. + // By default behaviour for ReadOnlyMemory is to be empty string, while for string is null. + // So if we do roundtrip string-> ReadOnlyMemory -> string all null string become empty strings. // Therefore replace all null values to empty string if field is string. if (type == typeof(string) && x == null) x = ""; @@ -434,56 +418,6 @@ public void RoundTripConversionWithBasicTypes() new ConversionSimpleClass() }; - var dataNullable = new List - { - new ConversionNullalbeClass() - { - fInt = int.MaxValue - 1, - fuInt = uint.MaxValue - 1, - fBool = true, - fsByte = sbyte.MaxValue - 1, - fByte = byte.MaxValue - 1, - fDouble = double.MaxValue - 1, - fFloat = float.MaxValue - 1, - fLong = long.MaxValue - 1, - fuLong = ulong.MaxValue - 1, - fShort = short.MaxValue - 1, - fuShort = ushort.MaxValue - 1, - fString = "ha" - }, - new ConversionNullalbeClass() - { - fInt = int.MaxValue, - fuInt = uint.MaxValue, - fBool = true, - fsByte = sbyte.MaxValue, - fByte = byte.MaxValue, - fDouble = double.MaxValue, - fFloat = float.MaxValue, - fLong = long.MaxValue, - fuLong = ulong.MaxValue, - fShort = short.MaxValue, - fuShort = ushort.MaxValue, - fString = "ooh" - }, - new ConversionNullalbeClass() - { - fInt = int.MinValue + 1, - fuInt = uint.MinValue, - fBool = false, - fsByte = sbyte.MinValue + 1, - fByte = byte.MinValue, - fDouble = double.MinValue + 1, - fFloat = float.MinValue + 1, - fLong = long.MinValue + 1, - fuLong = ulong.MinValue, - fShort = short.MinValue + 1, - fuShort = ushort.MinValue, - fString = "" - }, - new ConversionNullalbeClass() - }; - using (var env = new ConsoleEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); @@ -494,15 +428,6 @@ public void RoundTripConversionWithBasicTypes() Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); - - dataView = ComponentCreation.CreateDataView(env, dataNullable); - var enumeratorNullable = dataView.AsEnumerable(env, false).GetEnumerator(); - var originalNullableEnumerator = dataNullable.GetEnumerator(); - while (enumeratorNullable.MoveNext() && originalNullableEnumerator.MoveNext()) - { - Assert.True(CompareThroughReflection(enumeratorNullable.Current, originalNullableEnumerator.Current)); - } - Assert.True(!enumeratorNullable.MoveNext() && !originalNullableEnumerator.MoveNext()); } } @@ -542,38 +467,6 @@ public void ConversionExceptionsBehavior() } } - public class ConversionLossMinValueClass - { - public int? fInt; - public long? fLong; - public short? fShort; - public sbyte? fSByte; - } - - [Fact] - public void ConversionMinValueToNullBehavior() - { - using (var env = new ConsoleEnvironment()) - { - - var data = new List - { - new ConversionLossMinValueClass() { fSByte = null, fInt = null, fLong = null, fShort = null }, - new ConversionLossMinValueClass() { fSByte = sbyte.MinValue, fInt = int.MinValue, fLong = long.MinValue, fShort = short.MinValue } - }; - foreach (var field in typeof(ConversionLossMinValueClass).GetFields()) - { - var dataView = ComponentCreation.CreateDataView(env, data); - var enumerator = dataView.AsEnumerable(env, false).GetEnumerator(); - while (enumerator.MoveNext()) - { - Assert.True(enumerator.Current.fInt == null && enumerator.Current.fLong == null && - enumerator.Current.fSByte == null && enumerator.Current.fShort == null); - } - } - } - } - public class ConversionLossMinValueClassProperties { private int? _fInt; @@ -586,30 +479,6 @@ public class ConversionLossMinValueClassProperties public long? LongProp { get { return _fLong; } set { _fLong = value; } } } - [Fact] - public void ConversionMinValueToNullBehaviorProperties() - { - using (var env = new ConsoleEnvironment()) - { - - var data = new List - { - new ConversionLossMinValueClassProperties() { SByteProp = null, IntProp = null, LongProp = null, ShortProp = null }, - new ConversionLossMinValueClassProperties() { SByteProp = sbyte.MinValue, IntProp = int.MinValue, LongProp = long.MinValue, ShortProp = short.MinValue } - }; - foreach (var field in typeof(ConversionLossMinValueClassProperties).GetFields()) - { - var dataView = ComponentCreation.CreateDataView(env, data); - var enumerator = dataView.AsEnumerable(env, false).GetEnumerator(); - while (enumerator.MoveNext()) - { - Assert.True(enumerator.Current.IntProp == null && enumerator.Current.LongProp == null && - enumerator.Current.SByteProp == null && enumerator.Current.ShortProp == null); - } - } - } - } - public class ClassWithConstField { public const string ConstString = "N"; @@ -625,7 +494,6 @@ public void ClassWithConstFieldsConversion() { new ClassWithConstField(){ fInt=1, fString ="lala" }, new ClassWithConstField(){ fInt=-1, fString ="" }, - new ClassWithConstField(){ fInt=0, fString =null } }; using (var env = new ConsoleEnvironment()) @@ -654,7 +522,6 @@ public void ClassWithMixOfFieldsAndPropertiesConversion() { new ClassWithMixOfFieldsAndProperties(){ IntProp=1, fString ="lala" }, new ClassWithMixOfFieldsAndProperties(){ IntProp=-1, fString ="" }, - new ClassWithMixOfFieldsAndProperties(){ IntProp=0, fString =null } }; using (var env = new ConsoleEnvironment()) @@ -744,7 +611,6 @@ public void ClassWithInheritedPropertiesConversion() { new ClassWithInheritedProperties(){ IntProp=1, StringProp ="lala", LongProp=17, ByteProp=3 }, new ClassWithInheritedProperties(){ IntProp=-1, StringProp ="", LongProp=2, ByteProp=4 }, - new ClassWithInheritedProperties(){ IntProp=0, StringProp =null, LongProp=18, ByteProp=5 } }; using (var env = new ConsoleEnvironment()) @@ -774,22 +640,6 @@ public class ClassWithArrays public bool[] fBool; } - public class ClassWithNullableArrays - { - public string[] fString; - public int?[] fInt; - public uint?[] fuInt; - public short?[] fShort; - public ushort?[] fuShort; - public sbyte?[] fsByte; - public byte?[] fByte; - public long?[] fLong; - public ulong?[] fuLong; - public float?[] fFloat; - public double?[] fDouble; - public bool?[] fBool; - } - [Fact] public void RoundTripConversionWithArrays() { @@ -801,7 +651,7 @@ public void RoundTripConversionWithArrays() fInt = new int[3] { 0, 1, 2 }, fFloat = new float[3] { -0.99f, 0f, 0.99f }, fString = new string[2] { "hola", "lola" }, - fBool = new bool[2] { true, false }, + fByte = new byte[3] { 0, 124, 255 }, fDouble = new double[3] { -1, 0, 1 }, fLong = new long[] { 0, 1, 2 }, @@ -815,26 +665,6 @@ public void RoundTripConversionWithArrays() new ClassWithArrays() }; - var nullableData = new List - { - new ClassWithNullableArrays() - { - fInt = new int?[3] { null, -1, 1 }, - fFloat = new float?[3] { -0.99f, null, 0.99f }, - fString = new string[2] { null, "" }, - fBool = new bool?[3] { true, null, false }, - fByte = new byte?[4] { 0, 125, null, 255 }, - fDouble = new double?[3] { -1, null, 1 }, - fLong = new long?[] { null, -1, 1 }, - fsByte = new sbyte?[3] { -127, 127, null }, - fShort = new short?[3] { 0, null, 32767 }, - fuInt = new uint?[4] { null, 42, 0, uint.MaxValue }, - fuLong = new ulong?[3] { ulong.MaxValue, null, 0 }, - fuShort = new ushort?[3] { 0, null, ushort.MaxValue } - }, - new ClassWithNullableArrays() { fInt = new int?[3] { -2, 1, 0 }, fFloat = new float?[3] { 0.99f, 0f, -0.99f }, fString = new string[2] { "lola", "hola" } }, - new ClassWithNullableArrays() - }; using (var env = new ConsoleEnvironment()) { @@ -846,15 +676,6 @@ public void RoundTripConversionWithArrays() Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); - - var nullableDataView = ComponentCreation.CreateDataView(env, nullableData); - var enumeratorNullable = nullableDataView.AsEnumerable(env, false).GetEnumerator(); - var originalNullalbleEnumerator = nullableData.GetEnumerator(); - while (enumeratorNullable.MoveNext() && originalNullalbleEnumerator.MoveNext()) - { - Assert.True(CompareThroughReflection(enumeratorNullable.Current, originalNullalbleEnumerator.Current)); - } - Assert.True(!enumeratorNullable.MoveNext() && !originalNullalbleEnumerator.MoveNext()); } } public class ClassWithArrayProperties @@ -885,35 +706,6 @@ public class ClassWithArrayProperties public bool[] BoolProp { get { return _fBool; } set { _fBool = value; } } } - public class ClassWithNullableArrayProperties - { - private string[] _fString; - private int?[] _fInt; - private uint?[] _fuInt; - private short?[] _fShort; - private ushort?[] _fuShort; - private sbyte?[] _fsByte; - private byte?[] _fByte; - private long?[] _fLong; - private ulong?[] _fuLong; - private float?[] _fFloat; - private double?[] _fDouble; - private bool?[] _fBool; - - public string[] StringProp { get { return _fString; } set { _fString = value; } } - public int?[] IntProp { get { return _fInt; } set { _fInt = value; } } - public uint?[] UIntProp { get { return _fuInt; } set { _fuInt = value; } } - public short?[] ShortProp { get { return _fShort; } set { _fShort = value; } } - public ushort?[] UShortProp { get { return _fuShort; } set { _fuShort = value; } } - public sbyte?[] SByteProp { get { return _fsByte; } set { _fsByte = value; } } - public byte?[] ByteProp { get { return _fByte; } set { _fByte = value; } } - public long?[] LongProp { get { return _fLong; } set { _fLong = value; } } - public ulong?[] ULongProp { get { return _fuLong; } set { _fuLong = value; } } - public float?[] SingleProp { get { return _fFloat; } set { _fFloat = value; } } - public double?[] DoubleProp { get { return _fDouble; } set { _fDouble = value; } } - public bool?[] BoolProp { get { return _fBool; } set { _fBool = value; } } - } - [Fact] public void RoundTripConversionWithArrayPropertiess() { @@ -939,27 +731,6 @@ public void RoundTripConversionWithArrayPropertiess() new ClassWithArrayProperties() }; - var nullableData = new List - { - new ClassWithNullableArrayProperties() - { - IntProp = new int?[3] { null, -1, 1 }, - SingleProp = new float?[3] { -0.99f, null, 0.99f }, - StringProp = new string[2] { null, "" }, - BoolProp = new bool?[3] { true, null, false }, - ByteProp = new byte?[4] { 0, 125, null, 255 }, - DoubleProp = new double?[3] { -1, null, 1 }, - LongProp = new long?[] { null, -1, 1 }, - SByteProp = new sbyte?[3] { -127, 127, null }, - ShortProp = new short?[3] { 0, null, 32767 }, - UIntProp = new uint?[4] { null, 42, 0, uint.MaxValue }, - ULongProp = new ulong?[3] { ulong.MaxValue, null, 0 }, - UShortProp = new ushort?[3] { 0, null, ushort.MaxValue } - }, - new ClassWithNullableArrayProperties() { IntProp = new int?[3] { -2, 1, 0 }, SingleProp = new float?[3] { 0.99f, 0f, -0.99f }, StringProp = new string[2] { "lola", "hola" } }, - new ClassWithNullableArrayProperties() - }; - using (var env = new ConsoleEnvironment()) { var dataView = ComponentCreation.CreateDataView(env, data); @@ -970,15 +741,6 @@ public void RoundTripConversionWithArrayPropertiess() Assert.True(CompareThroughReflection(enumeratorSimple.Current, originalEnumerator.Current)); } Assert.True(!enumeratorSimple.MoveNext() && !originalEnumerator.MoveNext()); - - var nullableDataView = ComponentCreation.CreateDataView(env, nullableData); - var enumeratorNullable = nullableDataView.AsEnumerable(env, false).GetEnumerator(); - var originalNullalbleEnumerator = nullableData.GetEnumerator(); - while (enumeratorNullable.MoveNext() && originalNullalbleEnumerator.MoveNext()) - { - Assert.True(CompareThroughReflection(enumeratorNullable.Current, originalNullalbleEnumerator.Current)); - } - Assert.True(!enumeratorNullable.MoveNext() && !originalNullalbleEnumerator.MoveNext()); } } diff --git a/test/Microsoft.ML.Tests/CopyColumnEstimatorTests.cs b/test/Microsoft.ML.Tests/CopyColumnEstimatorTests.cs index 958e7052d4..a94c9dd727 100644 --- a/test/Microsoft.ML.Tests/CopyColumnEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/CopyColumnEstimatorTests.cs @@ -147,14 +147,14 @@ void TestMetadataCopy() var result = transformer.Transform(term); result.Schema.TryGetColumnIndex("T", out int termIndex); result.Schema.TryGetColumnIndex("T1", out int copyIndex); - var names1 = default(VBuffer); - var names2 = default(VBuffer); + var names1 = default(VBuffer>); + var names2 = default(VBuffer>); var type1 = result.Schema.GetColumnType(termIndex); int size = type1.ItemType.IsKey ? type1.ItemType.KeyCount : -1; var type2 = result.Schema.GetColumnType(copyIndex); result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, termIndex, ref names1); result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, copyIndex, ref names2); - Assert.True(CompareVec(ref names1, ref names2, size, DvText.Identical)); + Assert.True(CompareVec(ref names1, ref names2, size, (a, b) => a.Span.SequenceEqual(b.Span))); } } @@ -171,16 +171,16 @@ private void ValidateCopyColumnTransformer(IDataView result) { using (var cursor = result.GetRowCursor(x => true)) { - DvInt4 avalue = 0; - DvInt4 bvalue = 0; - DvInt4 dvalue = 0; - DvInt4 evalue = 0; - DvInt4 fvalue = 0; - var aGetter = cursor.GetGetter(0); - var bGetter = cursor.GetGetter(1); - var dGetter = cursor.GetGetter(3); - var eGetter = cursor.GetGetter(4); - var fGetter = cursor.GetGetter(5); + int avalue = 0; + int bvalue = 0; + int dvalue = 0; + int evalue = 0; + int fvalue = 0; + var aGetter = cursor.GetGetter(0); + var bGetter = cursor.GetGetter(1); + var dGetter = cursor.GetGetter(3); + var eGetter = cursor.GetGetter(4); + var fGetter = cursor.GetGetter(5); while (cursor.MoveNext()) { aGetter(ref avalue); diff --git a/test/Microsoft.ML.Tests/ImagesTests.cs b/test/Microsoft.ML.Tests/ImagesTests.cs index d4ef47b2a9..adc229e328 100644 --- a/test/Microsoft.ML.Tests/ImagesTests.cs +++ b/test/Microsoft.ML.Tests/ImagesTests.cs @@ -7,7 +7,7 @@ using Microsoft.ML.Runtime.ImageAnalytics; using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; -using Microsoft.ML.TestFramework; +using System; using System.Drawing; using System.IO; using System.Linq; @@ -103,8 +103,8 @@ public void TestSaveImages() cropped.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = cropped.GetRowCursor((x) => true)) { - var pathGetter = cursor.GetGetter(pathColumn); - DvText path = default; + var pathGetter = cursor.GetGetter>(pathColumn); + ReadOnlyMemory path = default; var bitmapCropGetter = cursor.GetGetter(cropBitmapColumn); Bitmap bitmap = default; while (cursor.MoveNext()) diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs index 265c6201c0..a459456a03 100644 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs @@ -94,7 +94,7 @@ public class Data public class Prediction { [ColumnName("PredictedLabel")] - public DvBool PredictedLabel; + public bool PredictedLabel; } [Fact] @@ -137,36 +137,6 @@ public void BooleanLabelPipeline() var model = pipeline.Train(); } - public class NullableBooleanLabelData - { - [ColumnName("Features")] - [VectorType(2)] - public float[] Features; - - [ColumnName("Label")] - public bool? Label; - } - - [Fact] - public void NullableBooleanLabelPipeline() - { - var data = new NullableBooleanLabelData[2]; - data[0] = new NullableBooleanLabelData - { - Features = new float[] { 0.0f, 1.0f }, - Label = null - }; - data[1] = new NullableBooleanLabelData - { - Features = new float[] { 1.0f, 0.0f }, - Label = false - }; - var pipeline = new Legacy.LearningPipeline(); - pipeline.Add(CollectionDataSource.Create(data)); - pipeline.Add(new FastForestBinaryClassifier()); - var model = pipeline.Train(); - } - [Fact] public void AppendPipeline() { diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 3494d971a8..4df5a6d039 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -1,4 +1,4 @@ - + diff --git a/test/Microsoft.ML.Tests/OnnxTests.cs b/test/Microsoft.ML.Tests/OnnxTests.cs index 0240e9e32a..a9fbb6417a 100644 --- a/test/Microsoft.ML.Tests/OnnxTests.cs +++ b/test/Microsoft.ML.Tests/OnnxTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; +using System; using System.IO; using System.Text.RegularExpressions; using Xunit; @@ -27,7 +28,7 @@ public class BreastCancerData public float Label; public float F1; - public DvText F2; + public ReadOnlyMemory F2; } public class BreastCancerDataAllColumns @@ -41,7 +42,7 @@ public class BreastCancerDataAllColumns public class BreastCancerPrediction { [ColumnName("PredictedLabel")] - public DvBool Cancerous; + public bool Cancerous; } public class BreastCancerMCPrediction diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs index eec9a8a430..0759d820f4 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs @@ -1,7 +1,8 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; using Xunit; @@ -41,11 +42,11 @@ void New_Visibility() Assert.True(cursor.Schema.TryGetColumnIndex("Features_TransformedText", out int transformedTextColumn)); Assert.True(cursor.Schema.TryGetColumnIndex("Features", out int featureColumn)); - var originalTextGettter = cursor.GetGetter(textColumn); - var transformedTextGettter = cursor.GetGetter>(transformedTextColumn); + var originalTextGettter = cursor.GetGetter>(textColumn); + var transformedTextGettter = cursor.GetGetter>>(transformedTextColumn); var featureGettter = cursor.GetGetter>(featureColumn); - DvText text = default; - VBuffer transformedText = default; + ReadOnlyMemory text = default; + VBuffer> transformedText = default; VBuffer features = default; while (cursor.MoveNext()) { diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Visibility.cs index aee89c3e7f..fd9c92b282 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Visibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Visibility.cs @@ -1,7 +1,8 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; using Xunit; @@ -42,11 +43,11 @@ void Visibility() Assert.True(cursor.Schema.TryGetColumnIndex("Features_TransformedText", out int transformedTextColumn)); Assert.True(cursor.Schema.TryGetColumnIndex("Features", out int featureColumn)); - var originalTextGettter = cursor.GetGetter(textColumn); - var transformedTextGettter = cursor.GetGetter>(transformedTextColumn); + var originalTextGettter = cursor.GetGetter>(textColumn); + var transformedTextGettter = cursor.GetGetter>>(transformedTextColumn); var featureGettter = cursor.GetGetter>(featureColumn); - DvText text = default; - VBuffer transformedText = default; + ReadOnlyMemory text = default; + VBuffer> transformedText = default; VBuffer features = default; while (cursor.MoveNext()) { diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs index aeaf0216cd..f841439322 100644 --- a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Runtime.Api; -using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.TestFramework; using Xunit; using Xunit.Abstractions; diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs index a239faa2c0..77cdc9cc3f 100644 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/MultithreadedPrediction.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. diff --git a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs index 5c6e5a2347..f3e0a2f70e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/PipelineApi/SimpleTrainAndPredict.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index 3e33fcea5a..e0517bbdd1 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -178,13 +178,13 @@ public void CrossValidateSentimentModelTest() var sentiments = GetTestData(); var predictions = cv.PredictorModels[0].Predict(sentiments); Assert.Equal(2, predictions.Count()); - Assert.True(predictions.ElementAt(0).Sentiment.IsTrue); - Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + Assert.True(predictions.ElementAt(0).Sentiment); + Assert.True(predictions.ElementAt(1).Sentiment); predictions = cv.PredictorModels[1].Predict(sentiments); Assert.Equal(2, predictions.Count()); - Assert.True(predictions.ElementAt(0).Sentiment.IsTrue); - Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + Assert.True(predictions.ElementAt(0).Sentiment); + Assert.True(predictions.ElementAt(1).Sentiment); } private void ValidateBinaryMetricsSymSGD(BinaryClassificationMetrics metrics) @@ -438,8 +438,8 @@ private void ValidateExamples(PredictionModel model) @@ -459,8 +459,8 @@ private void ValidateExamplesSymSGD(PredictionModel); + var names1 = default(VBuffer>); var type1 = result.Schema.GetColumnType(termIndex); int size = type1.ItemType.IsKey ? type1.ItemType.KeyCount : -1; result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, termIndex, ref names1); diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index 39a83d0f61..c3d73aba79 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -2,18 +2,134 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML; -using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.TestFramework; using System; +using System.IO; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.EntryPoints.Tests { + public sealed class TextLoaderTestPipe : TestDataPipeBase + { + public TextLoaderTestPipe(ITestOutputHelper output) + : base(output) + { + + } + + [Fact] + public void TestTextLoaderDataTypes() + { + string pathData = DeleteOutputPath("SavePipe", "TextInput.txt"); + File.WriteAllLines(pathData, new string[] { + string.Format("{0},{1},{2},{3}", sbyte.MinValue, short.MinValue, int.MinValue, long.MinValue), + string.Format("{0},{1},{2},{3}", sbyte.MaxValue, short.MaxValue, int.MaxValue, long.MaxValue), + "\"\",\"\",\"\",\"\"", + }); + + var data = TestCore(pathData, true, + new[] { + "loader=Text{col=DvInt1:I1:0 col=DvInt2:I2:1 col=DvInt4:I4:2 col=DvInt8:I8:3 sep=comma}", + }, logCurs: true); + + using (var cursor = data.GetRowCursor((a => true))) + { + var col1 = cursor.GetGetter(0); + var col2 = cursor.GetGetter(1); + var col3 = cursor.GetGetter(2); + var col4 = cursor.GetGetter(3); + + Assert.True(cursor.MoveNext()); + + sbyte[] sByteTargets = new sbyte[] { sbyte.MinValue, sbyte.MaxValue, default}; + short[] shortTargets = new short[] { short.MinValue, short.MaxValue, default }; + int[] intTargets = new int[] { int.MinValue, int.MaxValue, default }; + long[] longTargets = new long[] { long.MinValue, long.MaxValue, default }; + + int i = 0; + for (; i < sByteTargets.Length; i++) + { + sbyte sbyteValue = -1; + col1(ref sbyteValue); + Assert.Equal(sByteTargets[i], sbyteValue); + + short shortValue = -1; + col2(ref shortValue); + Assert.Equal(shortTargets[i], shortValue); + + int intValue = -1; + col3(ref intValue); + Assert.Equal(intTargets[i], intValue); + + long longValue = -1; + col4(ref longValue); + Assert.Equal(longTargets[i], longValue); + + if (i < sByteTargets.Length - 1) + Assert.True(cursor.MoveNext()); + else + Assert.False(cursor.MoveNext()); + } + + Assert.Equal(i, sByteTargets.Length); + } + } + + [Fact] + public void TestTextLoaderInvalidLongMin() + { + string pathData = DeleteOutputPath("SavePipe", "TextInput.txt"); + File.WriteAllLines(pathData, new string[] { + "-9223372036854775809" + + }); + + try + { + var data = TestCore(pathData, true, + new[] { + "loader=Text{col=DvInt8:I8:0 sep=comma}", + }, logCurs: true); + } + catch(Exception ex) + { + Assert.Equal("Value could not be parsed from text to long.", ex.Message); + return; + } + + Assert.True(false, "Test failed."); + } + + [Fact] + public void TestTextLoaderInvalidLongMax() + { + string pathData = DeleteOutputPath("SavePipe", "TextInput.txt"); + File.WriteAllLines(pathData, new string[] { + "9223372036854775808" + }); + + try + { + var data = TestCore(pathData, true, + new[] { + "loader=Text{col=DvInt8:I8:0 sep=comma}", + }, logCurs: true); + } + catch (Exception ex) + { + Assert.Equal("Value could not be parsed from text to long.", ex.Message); + return; + } + + Assert.True(false, "Test failed."); + } + } + public class TextLoaderTests : BaseTestClass { public TextLoaderTests(ITestOutputHelper output) @@ -74,7 +190,7 @@ public void CanSuccessfullyRetrieveQuotedData() using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter(0); - var TextGetter = cursor.GetGetter(1); + var TextGetter = cursor.GetGetter>(1); Assert.True(cursor.MoveNext()); @@ -82,7 +198,7 @@ public void CanSuccessfullyRetrieveQuotedData() IDGetter(ref ID); Assert.Equal(1, ID); - DvText Text = new DvText(); + ReadOnlyMemory Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("This text contains comma, within quotes.", Text.ToString()); @@ -92,7 +208,7 @@ public void CanSuccessfullyRetrieveQuotedData() IDGetter(ref ID); Assert.Equal(2, ID); - Text = new DvText(); + Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("This text contains extra punctuations and special characters.;*<>?!@#$%^&*()_+=-{}|[]:;'", Text.ToString()); @@ -102,7 +218,7 @@ public void CanSuccessfullyRetrieveQuotedData() IDGetter(ref ID); Assert.Equal(3, ID); - Text = new DvText(); + Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("This text has no quotes", Text.ToString()); @@ -197,7 +313,7 @@ public void CanSuccessfullyTrimSpaces() using (var cursor = data.GetRowCursor((a => true))) { var IDGetter = cursor.GetGetter(0); - var TextGetter = cursor.GetGetter(1); + var TextGetter = cursor.GetGetter>(1); Assert.True(cursor.MoveNext()); @@ -205,7 +321,7 @@ public void CanSuccessfullyTrimSpaces() IDGetter(ref ID); Assert.Equal(1, ID); - DvText Text = new DvText(); + ReadOnlyMemory Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("There is a space at the end", Text.ToString()); @@ -215,7 +331,7 @@ public void CanSuccessfullyTrimSpaces() IDGetter(ref ID); Assert.Equal(2, ID); - Text = new DvText(); + Text = new ReadOnlyMemory(); TextGetter(ref Text); Assert.Equal("There is no space at the end", Text.ToString()); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index 89e95ec08e..b995f7c984 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; +using System; using System.IO; using System.Linq; using Xunit; @@ -147,8 +148,8 @@ private void ValidateBinMetadata(IDataView result) Assert.True(result.Schema.TryGetColumnIndex("CatF", out int colH)); var types = result.Schema.GetMetadataTypes(colA); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); - VBuffer slots = default; - DvBool normalized = default; + VBuffer> slots = default; + bool normalized = default; result.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colA, ref slots); Assert.True(slots.Length == 6); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[6] { "[0].Bit2", "[0].Bit1", "[0].Bit0", "[1].Bit2", "[1].Bit1", "[1].Bit0" }); @@ -159,7 +160,7 @@ private void ValidateBinMetadata(IDataView result) Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "Bit1", "Bit0" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colB, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colC); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); @@ -174,7 +175,7 @@ private void ValidateBinMetadata(IDataView result) Assert.True(slots.Length == 3); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[3] { "Bit2", "Bit1", "Bit0" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colD, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colE); @@ -189,7 +190,7 @@ private void ValidateBinMetadata(IDataView result) Assert.True(slots.Length == 8); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[8] { "[0].Bit3", "[0].Bit2", "[0].Bit1", "[0].Bit0", "[1].Bit3", "[1].Bit2", "[1].Bit1", "[1].Bit0" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colF, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colG); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); @@ -204,7 +205,7 @@ private void ValidateBinMetadata(IDataView result) Assert.True(slots.Length == 3); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[3] { "Bit2", "Bit1", "Bit0" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colH, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); } private void ValidateBagMetadata(IDataView result) @@ -219,9 +220,9 @@ private void ValidateBagMetadata(IDataView result) Assert.True(result.Schema.TryGetColumnIndex("CatF", out int colH)); var types = result.Schema.GetMetadataTypes(colA); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); - VBuffer slots = default; - VBuffer slotRanges = default; - DvBool normalized = default; + VBuffer> slots = default; + VBuffer slotRanges = default; + bool normalized = default; result.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colA, ref slots); Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "A", "B" }); @@ -232,7 +233,7 @@ private void ValidateBagMetadata(IDataView result) Assert.True(slots.Length == 1); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[1] { "C" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colB, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colC); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); @@ -247,7 +248,7 @@ private void ValidateBagMetadata(IDataView result) Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "6", "1" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colD, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colE); @@ -259,7 +260,7 @@ private void ValidateBagMetadata(IDataView result) Assert.True(slotRanges.Length == 4); Assert.Equal(slotRanges.Items().Select(x => x.Value.ToString()), new string[4] { "0", "5", "6", "11" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colE, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colF); Assert.Equal(types.Select(x => x.Key), new string[3] { MetadataUtils.Kinds.SlotNames, MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.Kinds.IsNormalized }); @@ -270,7 +271,7 @@ private void ValidateBagMetadata(IDataView result) Assert.True(slotRanges.Length == 2); Assert.Equal(slotRanges.Items().Select(x => x.Value.ToString()), new string[2] { "0", "1" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colF, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colG); Assert.Equal(types.Select(x => x.Key), new string[3] { MetadataUtils.Kinds.SlotNames, MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.Kinds.IsNormalized }); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs index 5a39ef268b..56036bbc02 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; +using System; using System.IO; using System.Linq; using Xunit; @@ -124,8 +125,8 @@ private void ValidateMetadata(IDataView result) Assert.True(result.Schema.TryGetColumnIndex("CatD", out int colD)); var types = result.Schema.GetMetadataTypes(colA); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); - VBuffer slots = default; - DvBool normalized = default; + VBuffer> slots = default; + bool normalized = default; result.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colA, ref slots); Assert.True(slots.Length == 6); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[6] { "[0].Bit2", "[0].Bit1", "[0].Bit0", "[1].Bit2", "[1].Bit1", "[1].Bit0" }); @@ -136,7 +137,7 @@ private void ValidateMetadata(IDataView result) Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "Bit1", "Bit0" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colB, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colC); Assert.Equal(types.Select(x => x.Key), new string[0]); @@ -144,7 +145,7 @@ private void ValidateMetadata(IDataView result) types = result.Schema.GetMetadataTypes(colD); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.IsNormalized }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colD, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); } [Fact] diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs index 1570840f84..faf8b47e79 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; +using System; using System.IO; using System.Linq; using Xunit; @@ -147,9 +148,9 @@ private void ValidateMetadata(IDataView result) Assert.True(result.Schema.TryGetColumnIndex("CatF", out int colH)); var types = result.Schema.GetMetadataTypes(colA); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.SlotNames }); - VBuffer slots = default; - VBuffer slotRanges = default; - DvBool normalized = default; + VBuffer> slots = default; + VBuffer slotRanges = default; + bool normalized = default; result.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, colA, ref slots); Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "A", "B" }); @@ -161,9 +162,9 @@ private void ValidateMetadata(IDataView result) Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[1] { "C" }); result.Schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colB, ref slotRanges); Assert.True(slotRanges.Length == 2); - Assert.Equal(slotRanges.Items().Select(x => x.Value.RawValue), new int[2] { 0, 0 }); + Assert.Equal(slotRanges.Items().Select(x => x.Value), new int[2] { 0, 0 }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colB, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colC); Assert.Equal(types.Select(x => x.Key), new string[3] { MetadataUtils.Kinds.SlotNames, MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.Kinds.IsNormalized }); @@ -172,9 +173,9 @@ private void ValidateMetadata(IDataView result) Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[4] { "[0].3", "[0].5", "[1].3", "[1].5" }); result.Schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colC, ref slotRanges); Assert.True(slotRanges.Length == 4); - Assert.Equal(slotRanges.Items().Select(x => x.Value.RawValue), new int[4] { 0, 1, 2, 3 }); + Assert.Equal(slotRanges.Items().Select(x => x.Value), new int[4] { 0, 1, 2, 3 }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colC, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colD); Assert.Equal(types.Select(x => x.Key), new string[2] { MetadataUtils.Kinds.SlotNames, MetadataUtils.Kinds.IsNormalized }); @@ -182,34 +183,34 @@ private void ValidateMetadata(IDataView result) Assert.True(slots.Length == 2); Assert.Equal(slots.Items().Select(x => x.Value.ToString()), new string[2] { "6", "1" }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colD, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colE); Assert.Equal(types.Select(x => x.Key), new string[2] { MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.Kinds.IsNormalized }); result.Schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colE, ref slotRanges); Assert.True(slotRanges.Length == 4); - Assert.Equal(slotRanges.Items().Select(x => x.Value.RawValue), new int[4] { 0, 5, 6, 11 }); + Assert.Equal(slotRanges.Items().Select(x => x.Value), new int[4] { 0, 5, 6, 11 }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colE, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colF); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.IsNormalized }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colF, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colG); Assert.Equal(types.Select(x => x.Key), new string[2] { MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.Kinds.IsNormalized }); result.Schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colG, ref slotRanges); Assert.True(slotRanges.Length == 4); - Assert.Equal(slotRanges.Items().Select(x => x.Value.RawValue), new int[4] { 0, 5, 6, 11 }); + Assert.Equal(slotRanges.Items().Select(x => x.Value), new int[4] { 0, 5, 6, 11 }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colF, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); types = result.Schema.GetMetadataTypes(colH); Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.IsNormalized }); result.Schema.GetMetadata(MetadataUtils.Kinds.IsNormalized, colF, ref normalized); - Assert.True(normalized.IsTrue); + Assert.True(normalized); } [Fact] diff --git a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs index 969c5198b8..82dd6cf54a 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs @@ -19,12 +19,11 @@ public class NAReplaceTests : TestDataPipeBase private class TestClass { public float A; - public string B; - public double C; + public double B; [VectorType(2)] - public float[] D; + public float[] C; [VectorType(2)] - public double[] E; + public double[] D; } public NAReplaceTests(ITestOutputHelper output) : base(output) @@ -35,20 +34,19 @@ public NAReplaceTests(ITestOutputHelper output) : base(output) public void NAReplaceWorkout() { var data = new[] { - new TestClass() { A = 1, B = "A", C = 3, D= new float[2]{ 1, 2 } , E = new double[2]{ 3,4} }, - new TestClass() { A = float.NaN, B = null, C = double.NaN, D= new float[2]{ float.NaN, float.NaN } , E = new double[2]{ double.NaN,double.NaN}}, - new TestClass() { A = float.NegativeInfinity, B = null, C = double.NegativeInfinity,D= new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , E = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}}, - new TestClass() { A = float.PositiveInfinity, B = null, C = double.PositiveInfinity,D= new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , E = new double[2]{ double.PositiveInfinity, double.PositiveInfinity}}, - new TestClass() { A = 2, B = "B", C = 1 ,D= new float[2]{ 3, 4 } , E = new double[2]{ 5,6}}, + new TestClass() { A = 1, B = 3, C= new float[2]{ 1, 2 } , D = new double[2]{ 3,4} }, + new TestClass() { A = float.NaN, B = double.NaN, C= new float[2]{ float.NaN, float.NaN } , D = new double[2]{ double.NaN,double.NaN}}, + new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity,C= new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , D = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}}, + new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity,C= new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , D = new double[2]{ double.PositiveInfinity, double.PositiveInfinity}}, + new TestClass() { A = 2, B = 1 ,C= new float[2]{ 3, 4 } , D = new double[2]{ 5,6}}, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new NAReplaceEstimator(Env, new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.DefaultValue), + new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("E", "NAE", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); + new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); TestEstimatorCore(pipe, dataView); Done(); } @@ -58,26 +56,22 @@ public void NAReplaceStatic() { string dataPath = GetDataPath("breast-cancer.txt"); var reader = TextLoader.CreateReader(Env, ctx => ( - ScalarString: ctx.LoadText(1), ScalarFloat: ctx.LoadFloat(1), ScalarDouble: ctx.LoadDouble(1), - VectorString: ctx.LoadText(1, 4), VectorFloat: ctx.LoadFloat(1, 4), VectorDoulbe: ctx.LoadDouble(1, 4) )); var data = reader.Read(new MultiFileSource(dataPath)); - var wrongCollection = new[] { new TestClass() { A = 1, B = "A", C = 3, D = new float[2] { 1, 2 }, E = new double[2] { 3, 4 } } }; + var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } }; var invalidData = ComponentCreation.CreateDataView(Env, wrongCollection); var est = data.MakeNewEstimator(). Append(row => ( - A: row.ScalarString.ReplaceWithMissingValues(), - B: row.ScalarFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Maximum), - C: row.ScalarDouble.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - D: row.VectorString.ReplaceWithMissingValues(), - E: row.VectorFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - F: row.VectorDoulbe.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Minimum) + A: row.ScalarFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Maximum), + B: row.ScalarDouble.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), + C: row.VectorFloat.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), + D: row.VectorDoulbe.ReplaceWithMissingValues(NAReplaceTransform.ColumnInfo.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); @@ -86,7 +80,7 @@ public void NAReplaceStatic() { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = TakeFilter.Create(Env, est.Fit(data).Transform(data).AsDynamic, 4); - savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D", "E"); + savedData = new ChooseColumnsTransform(Env, savedData, "A", "B", "C", "D"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } @@ -105,20 +99,19 @@ public void TestCommandLine() public void TestOldSavingAndLoading() { var data = new[] { - new TestClass() { A = 1, B = "A", C = 3, D= new float[2]{ 1, 2 } , E = new double[2]{ 3,4} }, - new TestClass() { A = float.NaN, B = null, C = double.NaN, D= new float[2]{ float.NaN, float.NaN } , E = new double[2]{ double.NaN,double.NaN}}, - new TestClass() { A = float.NegativeInfinity, B = null, C = double.NegativeInfinity,D= new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , E = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}}, - new TestClass() { A = float.PositiveInfinity, B = null, C = double.PositiveInfinity,D= new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , E = new double[2]{ double.PositiveInfinity, double.PositiveInfinity}}, - new TestClass() { A = 2, B = "B", C = 1 ,D= new float[2]{ 3, 4 } , E = new double[2]{ 5,6}}, + new TestClass() { A = 1, B = 3, C= new float[2]{ 1, 2 } , D = new double[2]{ 3,4} }, + new TestClass() { A = float.NaN, B = double.NaN, C= new float[2]{ float.NaN, float.NaN } , D = new double[2]{ double.NaN,double.NaN}}, + new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity,C= new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , D = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}}, + new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity,C= new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , D = new double[2]{ double.PositiveInfinity, double.PositiveInfinity}}, + new TestClass() { A = 2, B = 1 ,C= new float[2]{ 3, 4 } , D = new double[2]{ 5,6}}, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new NAReplaceEstimator(Env, new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.DefaultValue), + new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), - new NAReplaceTransform.ColumnInfo("E", "NAE", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); + new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result);