diff --git a/build/Dependencies.props b/build/Dependencies.props
index 24f3153e4c..c1334615db 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -13,7 +13,7 @@
0.11.1
1.10.0
1.5.0
-
+ 4.5.1
2.9.0
4.5.0
1.2.0
diff --git a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
index 7757e264b6..7aed922027 100644
--- a/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
+++ b/pkg/Microsoft.ML/Microsoft.ML.nupkgproj
@@ -12,6 +12,7 @@
+
diff --git a/src/Microsoft.ML.Api/ApiUtils.cs b/src/Microsoft.ML.Api/ApiUtils.cs
index 96e821f16e..760ed1e768 100644
--- a/src/Microsoft.ML.Api/ApiUtils.cs
+++ b/src/Microsoft.ML.Api/ApiUtils.cs
@@ -19,11 +19,10 @@ private static OpCode GetAssignmentOpCode(Type t)
{
// REVIEW: This should be a Dictionary based solution.
// DvTypes, strings, arrays, all nullable types, VBuffers and UInt128.
- if (t == typeof(DvInt8) || t == typeof(DvInt4) || t == typeof(DvInt2) || t == typeof(DvInt1) ||
- t == typeof(DvBool) || t == typeof(DvText) || t == typeof(string) || t.IsArray ||
+ if (t == typeof(ReadOnlyMemory) || t == typeof(string) || t.IsArray ||
(t.IsGenericType && t.GetGenericTypeDefinition() == typeof(VBuffer<>)) ||
(t.IsGenericType && t.GetGenericTypeDefinition() == typeof(Nullable<>)) ||
- t == typeof(DvDateTime) || t == typeof(DvDateTimeZone) || t == typeof(DvTimeSpan) || t == typeof(UInt128))
+ t == typeof(DateTime) || t == typeof(DateTimeOffset) || t == typeof(TimeSpan) || t == typeof(UInt128))
{
return OpCodes.Stobj;
}
diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs
index 6962080a7e..ca7ed9c07e 100644
--- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs
+++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs
@@ -125,61 +125,11 @@ private Delegate CreateGetter(int index)
if (outputType.IsArray)
{
Ch.Assert(colType.IsVector);
- // String[] -> VBuffer
+ // String[] -> ReadOnlyMemory
if (outputType.GetElementType() == typeof(string))
{
Ch.Assert(colType.ItemType.IsText);
- return CreateConvertingArrayGetterDelegate(index, x => x == null ? DvText.NA : new DvText(x));
- }
- else if (outputType.GetElementType() == typeof(int))
- {
- Ch.Assert(colType.ItemType == NumberType.I4);
- return CreateConvertingArrayGetterDelegate(index, x => x);
- }
- else if (outputType.GetElementType() == typeof(int?))
- {
- Ch.Assert(colType.ItemType == NumberType.I4);
- return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt4.NA);
- }
- else if (outputType.GetElementType() == typeof(long))
- {
- Ch.Assert(colType.ItemType == NumberType.I8);
- return CreateConvertingArrayGetterDelegate(index, x => x);
- }
- else if (outputType.GetElementType() == typeof(long?))
- {
- Ch.Assert(colType.ItemType == NumberType.I8);
- return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt8.NA);
- }
- else if (outputType.GetElementType() == typeof(short))
- {
- Ch.Assert(colType.ItemType == NumberType.I2);
- return CreateConvertingArrayGetterDelegate(index, x => x);
- }
- else if (outputType.GetElementType() == typeof(short?))
- {
- Ch.Assert(colType.ItemType == NumberType.I2);
- return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt2.NA);
- }
- else if (outputType.GetElementType() == typeof(sbyte))
- {
- Ch.Assert(colType.ItemType == NumberType.I1);
- return CreateConvertingArrayGetterDelegate(index, x => x);
- }
- else if (outputType.GetElementType() == typeof(sbyte?))
- {
- Ch.Assert(colType.ItemType == NumberType.I1);
- return CreateConvertingArrayGetterDelegate(index, x => x ?? DvInt1.NA);
- }
- else if (outputType.GetElementType() == typeof(bool))
- {
- Ch.Assert(colType.ItemType.IsBool);
- return CreateConvertingArrayGetterDelegate(index, x => x);
- }
- else if (outputType.GetElementType() == typeof(bool?))
- {
- Ch.Assert(colType.ItemType.IsBool);
- return CreateConvertingArrayGetterDelegate(index, x => x ?? DvBool.NA);
+ return CreateConvertingArrayGetterDelegate>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty);
}
// T[] -> VBuffer
@@ -193,7 +143,7 @@ private Delegate CreateGetter(int index)
else if (colType.IsVector)
{
// VBuffer -> VBuffer
- // REVIEW: Do we care about accomodating VBuffer -> VBuffer?
+ // REVIEW: Do we care about accomodating VBuffer -> ReadOnlyMemory?
Ch.Assert(outputType.IsGenericType);
Ch.Assert(outputType.GetGenericTypeDefinition() == typeof(VBuffer<>));
Ch.Assert(outputType.GetGenericArguments()[0] == colType.ItemType.RawType);
@@ -204,70 +154,11 @@ private Delegate CreateGetter(int index)
{
if (outputType == typeof(string))
{
- // String -> DvText
+ // String -> ReadOnlyMemory
Ch.Assert(colType.IsText);
- return CreateConvertingGetterDelegate(index, x => x == null ? DvText.NA : new DvText(x));
- }
- else if (outputType == typeof(bool))
- {
- // Bool -> DvBool
- Ch.Assert(colType.IsBool);
- return CreateConvertingGetterDelegate(index, x => x);
- }
- else if (outputType == typeof(bool?))
- {
- // Bool? -> DvBool
- Ch.Assert(colType.IsBool);
- return CreateConvertingGetterDelegate(index, x => x ?? DvBool.NA);
- }
- else if (outputType == typeof(int))
- {
- // int -> DvInt4
- Ch.Assert(colType == NumberType.I4);
- return CreateConvertingGetterDelegate(index, x => x);
- }
- else if (outputType == typeof(int?))
- {
- // int? -> DvInt4
- Ch.Assert(colType == NumberType.I4);
- return CreateConvertingGetterDelegate(index, x => x ?? DvInt4.NA);
- }
- else if (outputType == typeof(short))
- {
- // short -> DvInt2
- Ch.Assert(colType == NumberType.I2);
- return CreateConvertingGetterDelegate(index, x => x);
- }
- else if (outputType == typeof(short?))
- {
- // short? -> DvInt2
- Ch.Assert(colType == NumberType.I2);
- return CreateConvertingGetterDelegate(index, x => x ?? DvInt2.NA);
- }
- else if (outputType == typeof(long))
- {
- // long -> DvInt8
- Ch.Assert(colType == NumberType.I8);
- return CreateConvertingGetterDelegate(index, x => x);
- }
- else if (outputType == typeof(long?))
- {
- // long? -> DvInt8
- Ch.Assert(colType == NumberType.I8);
- return CreateConvertingGetterDelegate(index, x => x ?? DvInt8.NA);
- }
- else if (outputType == typeof(sbyte))
- {
- // sbyte -> DvInt1
- Ch.Assert(colType == NumberType.I1);
- return CreateConvertingGetterDelegate(index, x => x);
- }
- else if (outputType == typeof(sbyte?))
- {
- // sbyte? -> DvInt1
- Ch.Assert(colType == NumberType.I1);
- return CreateConvertingGetterDelegate(index, x => x ?? DvInt1.NA);
+ return CreateConvertingGetterDelegate>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory.Empty);
}
+
// T -> T
if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(Nullable<>))
Ch.Assert(colType.RawType == Nullable.GetUnderlyingType(outputType));
@@ -805,12 +696,12 @@ public override ValueGetter GetGetter()
var itemType = typeT.GetElementType();
var dstItemType = typeof(TDst).GetGenericArguments()[0];
- // String[] -> VBuffer
+ // String[] -> VBuffer>
if (itemType == typeof(string))
{
- Contracts.Check(dstItemType == typeof(DvText));
+ Contracts.Check(dstItemType == typeof(ReadOnlyMemory));
- ValueGetter> method = GetStringArray;
+ ValueGetter>> method = GetStringArray;
return method as ValueGetter;
}
@@ -825,7 +716,7 @@ public override ValueGetter GetGetter()
if (MetadataType.IsVector)
{
// VBuffer -> VBuffer
- // REVIEW: Do we care about accomodating VBuffer -> VBuffer?
+ // REVIEW: Do we care about accomodating VBuffer -> VBuffer>?
Contracts.Assert(typeT.IsGenericType);
Contracts.Check(typeof(TDst).IsGenericType);
@@ -845,9 +736,9 @@ public override ValueGetter GetGetter()
{
if (typeT == typeof(string))
{
- // String -> DvText
+ // String -> ReadOnlyMemory
Contracts.Assert(MetadataType.IsText);
- ValueGetter m = GetString;
+ ValueGetter> m = GetString;
return m as ValueGetter;
}
// T -> T
@@ -861,14 +752,14 @@ public class TElement
{
}
- private void GetStringArray(ref VBuffer dst)
+ private void GetStringArray(ref VBuffer> dst)
{
var value = (string[])(object)Value;
var n = Utils.Size(value);
- dst = new VBuffer(n, Utils.Size(dst.Values) < n ? new DvText[n] : dst.Values, dst.Indices);
+ dst = new VBuffer>(n, Utils.Size(dst.Values) < n ? new ReadOnlyMemory[n] : dst.Values, dst.Indices);
for (int i = 0; i < n; i++)
- dst.Values[i] = new DvText(value[i]);
+ dst.Values[i] = value[i].AsMemory();
}
@@ -890,9 +781,9 @@ private ValueGetter> GetVBufferGetter()
return (ref VBuffer dst) => castValue.CopyTo(ref dst);
}
- private void GetString(ref DvText dst)
+ private void GetString(ref ReadOnlyMemory dst)
{
- dst = new DvText((string)(object)Value);
+ dst = ((string)(object)Value).AsMemory();
}
private void GetDirectValue(ref TDst dst)
diff --git a/src/Microsoft.ML.Api/TypedCursor.cs b/src/Microsoft.ML.Api/TypedCursor.cs
index 923aac5c81..50bb1cd575 100644
--- a/src/Microsoft.ML.Api/TypedCursor.cs
+++ b/src/Microsoft.ML.Api/TypedCursor.cs
@@ -276,61 +276,11 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit
if (fieldType.IsArray)
{
Ch.Assert(colType.IsVector);
- // VBuffer -> String[]
+ // VBuffer> -> String[]
if (fieldType.GetElementType() == typeof(string))
{
Ch.Assert(colType.ItemType.IsText);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => x.ToString());
- }
- else if (fieldType.GetElementType() == typeof(bool))
- {
- Ch.Assert(colType.ItemType.IsBool);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (bool)x);
- }
- else if (fieldType.GetElementType() == typeof(bool?))
- {
- Ch.Assert(colType.ItemType.IsBool);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (bool?)x);
- }
- else if (fieldType.GetElementType() == typeof(int))
- {
- Ch.Assert(colType.ItemType == NumberType.I4);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (int)x);
- }
- else if (fieldType.GetElementType() == typeof(int?))
- {
- Ch.Assert(colType.ItemType == NumberType.I4);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (int?)x);
- }
- else if (fieldType.GetElementType() == typeof(short))
- {
- Ch.Assert(colType.ItemType == NumberType.I2);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (short)x);
- }
- else if (fieldType.GetElementType() == typeof(short?))
- {
- Ch.Assert(colType.ItemType == NumberType.I2);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (short?)x);
- }
- else if (fieldType.GetElementType() == typeof(long))
- {
- Ch.Assert(colType.ItemType == NumberType.I8);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (long)x);
- }
- else if (fieldType.GetElementType() == typeof(long?))
- {
- Ch.Assert(colType.ItemType == NumberType.I8);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (long?)x);
- }
- else if (fieldType.GetElementType() == typeof(sbyte))
- {
- Ch.Assert(colType.ItemType == NumberType.I1);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (sbyte)x);
- }
- else if (fieldType.GetElementType() == typeof(sbyte?))
- {
- Ch.Assert(colType.ItemType == NumberType.I1);
- return CreateConvertingVBufferSetter(input, index, poke, peek, x => (sbyte?)x);
+ return CreateConvertingVBufferSetter, string>(input, index, poke, peek, x => x.ToString());
}
// VBuffer -> T[]
@@ -344,7 +294,7 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit
else if (colType.IsVector)
{
// VBuffer -> VBuffer
- // REVIEW: Do we care about accomodating VBuffer -> VBuffer?
+ // REVIEW: Do we care about accomodating VBuffer -> VBuffer>?
Ch.Assert(fieldType.IsGenericType);
Ch.Assert(fieldType.GetGenericTypeDefinition() == typeof(VBuffer<>));
Ch.Assert(fieldType.GetGenericArguments()[0] == colType.ItemType.RawType);
@@ -355,71 +305,12 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit
{
if (fieldType == typeof(string))
{
- // DvText -> String
+ // ReadOnlyMemory -> String
Ch.Assert(colType.IsText);
Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => x.ToString());
- }
- else if (fieldType == typeof(bool))
- {
- Ch.Assert(colType.IsBool);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (bool)x);
- }
- else if (fieldType == typeof(bool?))
- {
- Ch.Assert(colType.IsBool);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (bool?)x);
- }
- else if (fieldType == typeof(int))
- {
- Ch.Assert(colType == NumberType.I4);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (int)x);
- }
- else if (fieldType == typeof(int?))
- {
- Ch.Assert(colType == NumberType.I4);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (int?)x);
- }
- else if (fieldType == typeof(short))
- {
- Ch.Assert(colType == NumberType.I2);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (short)x);
- }
- else if (fieldType == typeof(short?))
- {
- Ch.Assert(colType == NumberType.I2);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (short?)x);
- }
- else if (fieldType == typeof(long))
- {
- Ch.Assert(colType == NumberType.I8);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (long)x);
- }
- else if (fieldType == typeof(long?))
- {
- Ch.Assert(colType == NumberType.I8);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (long?)x);
- }
- else if (fieldType == typeof(sbyte))
- {
- Ch.Assert(colType == NumberType.I1);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (sbyte)x);
- }
- else if (fieldType == typeof(sbyte?))
- {
- Ch.Assert(colType == NumberType.I1);
- Ch.Assert(peek == null);
- return CreateConvertingActionSetter(input, index, poke, x => (sbyte?)x);
+ return CreateConvertingActionSetter, string>(input, index, poke, x => x.ToString());
}
+
// T -> T
if (fieldType.IsGenericType && fieldType.GetGenericTypeDefinition() == typeof(Nullable<>))
Ch.Assert(colType.RawType == Nullable.GetUnderlyingType(fieldType));
diff --git a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs
index de89ddc602..d5a204dd45 100644
--- a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs
+++ b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs
@@ -1,4 +1,4 @@
-//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
// Command Line Argument Parser
// ----------------------------
// Usage
diff --git a/src/Microsoft.ML.Core/Data/ColumnType.cs b/src/Microsoft.ML.Core/Data/ColumnType.cs
index 96764d68f1..69a6505c51 100644
--- a/src/Microsoft.ML.Core/Data/ColumnType.cs
+++ b/src/Microsoft.ML.Core/Data/ColumnType.cs
@@ -120,47 +120,38 @@ public bool IsBool
}
///
- /// Whether this type is the standard timespan type.
+ /// Whether this type is the standard type.
///
public bool IsTimeSpan
{
get
{
- if (!(this is TimeSpanType))
- return false;
- // TimeSpanType is a singleton.
- Contracts.Assert(this == TimeSpanType.Instance);
- return true;
+ Contracts.Assert((this == TimeSpanType.Instance) == (this is TimeSpanType));
+ return this is TimeSpanType;
}
}
///
- /// Whether this type is a DvDateTime.
+ /// Whether this type is a .
///
public bool IsDateTime
{
get
{
- if (!(this is DateTimeType))
- return false;
- // DateTimeType is a singleton.
- Contracts.Assert(this == DateTimeType.Instance);
- return true;
+ Contracts.Assert((this == DateTimeType.Instance) == (this is DateTimeType));
+ return this is DateTimeType;
}
}
///
- /// Whether this type is a DvDateTimeZone.
+ /// Whether this type is a
///
public bool IsDateTimeZone
{
get
{
- if (!(this is DateTimeZoneType))
- return false;
- // DateTimeZoneType is a singleton.
- Contracts.Assert(this == DateTimeZoneType.Instance);
- return true;
+ Contracts.Assert((this == DateTimeOffsetType.Instance) == (this is DateTimeOffsetType));
+ return this is DateTimeOffsetType;
}
}
@@ -319,7 +310,7 @@ public static PrimitiveType FromKind(DataKind kind)
if (kind == DataKind.DT)
return DateTimeType.Instance;
if (kind == DataKind.DZ)
- return DateTimeZoneType.Instance;
+ return DateTimeOffsetType.Instance;
return NumberType.FromKind(kind);
}
}
@@ -341,7 +332,7 @@ public static TextType Instance
}
private TextType()
- : base(typeof(DvText), DataKind.TX)
+ : base(typeof(ReadOnlyMemory), DataKind.TX)
{
}
@@ -573,7 +564,7 @@ public static BoolType Instance
}
private BoolType()
- : base(typeof(DvBool), DataKind.BL)
+ : base(typeof(bool), DataKind.BL)
{
}
@@ -605,7 +596,7 @@ public static DateTimeType Instance
}
private DateTimeType()
- : base(typeof(DvDateTime), DataKind.DT)
+ : base(typeof(DateTime), DataKind.DT)
{
}
@@ -623,21 +614,21 @@ public override string ToString()
}
}
- public sealed class DateTimeZoneType : PrimitiveType
+ public sealed class DateTimeOffsetType : PrimitiveType
{
- private static volatile DateTimeZoneType _instance;
- public static DateTimeZoneType Instance
+ private static volatile DateTimeOffsetType _instance;
+ public static DateTimeOffsetType Instance
{
get
{
if (_instance == null)
- Interlocked.CompareExchange(ref _instance, new DateTimeZoneType(), null);
+ Interlocked.CompareExchange(ref _instance, new DateTimeOffsetType(), null);
return _instance;
}
}
- private DateTimeZoneType()
- : base(typeof(DvDateTimeZone), DataKind.DZ)
+ private DateTimeOffsetType()
+ : base(typeof(DateTimeOffset), DataKind.DZ)
{
}
@@ -645,7 +636,7 @@ public override bool Equals(ColumnType other)
{
if (other == this)
return true;
- Contracts.Assert(!(other is DateTimeZoneType));
+ Contracts.Assert(!(other is DateTimeOffsetType));
return false;
}
@@ -672,7 +663,7 @@ public static TimeSpanType Instance
}
private TimeSpanType()
- : base(typeof(DvTimeSpan), DataKind.TS)
+ : base(typeof(TimeSpan), DataKind.TS)
{
}
diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs
index 0249745691..ad8d8fbfe0 100644
--- a/src/Microsoft.ML.Core/Data/DataKind.cs
+++ b/src/Microsoft.ML.Core/Data/DataKind.cs
@@ -55,7 +55,7 @@ public enum DataKind : byte
public static class DataKindExtensions
{
public const DataKind KindMin = DataKind.I1;
- public const DataKind KindLim = DataKind.UG + 1;
+ public const DataKind KindLim = DataKind.U16 + 1;
public const int KindCount = KindLim - KindMin;
///
@@ -141,19 +141,19 @@ public static Type ToType(this DataKind kind)
switch (kind)
{
case DataKind.I1:
- return typeof(DvInt1);
+ return typeof(sbyte);
case DataKind.U1:
return typeof(byte);
case DataKind.I2:
- return typeof(DvInt2);
+ return typeof(short);
case DataKind.U2:
return typeof(ushort);
case DataKind.I4:
- return typeof(DvInt4);
+ return typeof(int);
case DataKind.U4:
return typeof(uint);
case DataKind.I8:
- return typeof(DvInt8);
+ return typeof(long);
case DataKind.U8:
return typeof(ulong);
case DataKind.R4:
@@ -161,15 +161,15 @@ public static Type ToType(this DataKind kind)
case DataKind.R8:
return typeof(Double);
case DataKind.TX:
- return typeof(DvText);
+ return typeof(ReadOnlyMemory);
case DataKind.BL:
- return typeof(DvBool);
+ return typeof(bool);
case DataKind.TS:
- return typeof(DvTimeSpan);
+ return typeof(TimeSpan);
case DataKind.DT:
- return typeof(DvDateTime);
+ return typeof(DateTime);
case DataKind.DZ:
- return typeof(DvDateTimeZone);
+ return typeof(DateTimeOffset);
case DataKind.UG:
return typeof(UInt128);
}
@@ -185,35 +185,35 @@ public static bool TryGetDataKind(this Type type, out DataKind kind)
Contracts.CheckValueOrNull(type);
// REVIEW: Make this more efficient. Should we have a global dictionary?
- if (type == typeof(DvInt1) || type == typeof(sbyte) || type == typeof(sbyte?))
+ if (type == typeof(sbyte))
kind = DataKind.I1;
- else if (type == typeof(byte) || type == typeof(byte?))
+ else if (type == typeof(byte))
kind = DataKind.U1;
- else if (type == typeof(DvInt2)|| type== typeof(short) || type == typeof(short?))
+ else if (type == typeof(short))
kind = DataKind.I2;
- else if (type == typeof(ushort)|| type == typeof(ushort?))
+ else if (type == typeof(ushort))
kind = DataKind.U2;
- else if (type == typeof(DvInt4) || type == typeof(int)|| type == typeof(int?))
+ else if (type == typeof(int))
kind = DataKind.I4;
- else if (type == typeof(uint)|| type == typeof(uint?))
+ else if (type == typeof(uint))
kind = DataKind.U4;
- else if (type == typeof(DvInt8) || type==typeof(long)|| type == typeof(long?))
+ else if (type == typeof(long))
kind = DataKind.I8;
- else if (type == typeof(ulong)|| type == typeof(ulong?))
+ else if (type == typeof(ulong))
kind = DataKind.U8;
- else if (type == typeof(Single)|| type == typeof(Single?))
+ else if (type == typeof(Single))
kind = DataKind.R4;
- else if (type == typeof(Double)|| type == typeof(Double?))
+ else if (type == typeof(Double))
kind = DataKind.R8;
- else if (type == typeof(DvText))
+ else if (type == typeof(ReadOnlyMemory) || type == typeof(string))
kind = DataKind.TX;
- else if (type == typeof(DvBool) || type == typeof(bool) || type == typeof(bool?))
+ else if (type == typeof(bool))
kind = DataKind.BL;
- else if (type == typeof(DvTimeSpan))
+ else if (type == typeof(TimeSpan))
kind = DataKind.TS;
- else if (type == typeof(DvDateTime))
+ else if (type == typeof(DateTime))
kind = DataKind.DT;
- else if (type == typeof(DvDateTimeZone))
+ else if (type == typeof(DateTimeOffset))
kind = DataKind.DZ;
else if (type == typeof(UInt128))
kind = DataKind.UG;
diff --git a/src/Microsoft.ML.Core/Data/DateTime.cs b/src/Microsoft.ML.Core/Data/DateTime.cs
deleted file mode 100644
index d11be2a494..0000000000
--- a/src/Microsoft.ML.Core/Data/DateTime.cs
+++ /dev/null
@@ -1,550 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using Microsoft.ML.Runtime.Internal.Utilities;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using Conditional = System.Diagnostics.ConditionalAttribute;
- using SysDateTime = System.DateTime;
- using SysDateTimeOffset = System.DateTimeOffset;
- using SysTimeSpan = System.TimeSpan;
-
- ///
- /// A struct to represent a DateTime column type
- ///
- public struct DvDateTime : IEquatable, IComparable
- {
- public const long MaxTicks = 3155378975999999999;
- private readonly DvInt8 _ticks;
-
- ///
- /// This ctor initializes _ticks to the value of sdt.Ticks, and ignores its DateTimeKind value.
- ///
- public DvDateTime(SysDateTime sdt)
- {
- _ticks = sdt.Ticks;
- AssertValid();
- }
-
- ///
- /// This ctor accepts any value for ticks, but produces an NA if ticks is out of the legal range.
- ///
- public DvDateTime(DvInt8 ticks)
- {
- if ((ulong)ticks.RawValue > MaxTicks)
- _ticks = DvInt8.NA;
- else
- _ticks = ticks;
- AssertValid();
- }
-
- [Conditional("DEBUG")]
- internal void AssertValid()
- {
- Contracts.Assert((ulong)_ticks.RawValue <= MaxTicks || _ticks.IsNA);
- }
-
- public DvInt8 Ticks
- {
- get
- {
- AssertValid();
- return _ticks;
- }
- }
-
- // REVIEW: Add more System.DateTime members returning their corresponding 'Dv' types (task 4255).
- ///
- /// Gets the date component of this object.
- ///
- public DvDateTime Date
- {
- get
- {
- AssertValid();
- if (IsNA)
- return NA;
- return new DvDateTime(GetSysDateTime().Date);
- }
- }
-
- ///
- /// Gets a DvDateTime object representing the current UTC date and time.
- ///
- public static DvDateTime UtcNow { get { return new DvDateTime(SysDateTime.UtcNow); } }
-
- public bool IsNA
- {
- get
- {
- AssertValid();
- return (ulong)_ticks.RawValue > MaxTicks;
- }
- }
-
- public static DvDateTime NA
- {
- get { return new DvDateTime(DvInt8.NA); }
- }
-
- public static explicit operator SysDateTime?(DvDateTime dvDt)
- {
- if (dvDt.IsNA)
- return null;
- return dvDt.GetSysDateTime();
- }
-
- ///
- /// Creates a new DvDateTime with the same number of ticks as in sdt, ignoring its DateTimeKind value.
- ///
- public static implicit operator DvDateTime(SysDateTime sdt)
- {
- return new DvDateTime(sdt);
- }
-
- public static implicit operator DvDateTime(SysDateTime? sdt)
- {
- if (sdt == null)
- return DvDateTime.NA;
- return new DvDateTime(sdt.Value);
- }
-
- public override string ToString()
- {
- AssertValid();
- if (IsNA)
- return "";
- return GetSysDateTime().ToString("o");
- }
-
- internal SysDateTime GetSysDateTime()
- {
- AssertValid();
- Contracts.Assert(!IsNA);
- return new SysDateTime(_ticks.RawValue);
- }
-
- public bool Equals(DvDateTime other)
- {
- return _ticks.RawValue == other._ticks.RawValue;
- }
-
- public override bool Equals(object obj)
- {
- return obj is DvDateTime && Equals((DvDateTime)obj);
- }
-
- public int CompareTo(DvDateTime other)
- {
- if (_ticks.RawValue == other._ticks.RawValue)
- return 0;
- return _ticks.RawValue < other._ticks.RawValue ? -1 : 1;
- }
-
- public override int GetHashCode()
- {
- return _ticks.GetHashCode();
- }
- }
-
- ///
- /// A struct to represent a DateTimeZone column type.
- ///
- public struct DvDateTimeZone : IEquatable, IComparable
- {
- public const long TicksPerMinute = 600000000;
- public const long MaxMinutesOffset = 840;
- public const long MinMinutesOffset = -840;
-
- // Stores the UTC date-time (convert to clock time by adding the offset).
- private readonly DvDateTime _dateTime;
- // Store the offset in minutes.
- private readonly DvInt2 _offset;
-
- // This assumes (and asserts) that the dt/offset combination is valid.
- // Callers should do the validation.
- private DvDateTimeZone(DvDateTime dt, DvInt2 offset)
- {
- _dateTime = dt;
- _offset = offset;
- AssertValid();
- }
-
- ///
- /// Given a number of ticks for the date time portion and a number of minutes for
- /// the time zone offset, this constructs a new DvDateTimeZone. If anything is invalid,
- /// it produces NA.
- ///
- /// The number of clock ticks in the date time portion
- /// The time zone offset in minutes
- public DvDateTimeZone(DvInt8 ticks, DvInt2 offset)
- {
- var dt = new DvDateTime(ticks);
- if (dt.IsNA || offset.IsNA || MinMinutesOffset > offset.RawValue || offset.RawValue > MaxMinutesOffset)
- {
- _dateTime = DvDateTime.NA;
- _offset = DvInt2.NA;
- }
- else
- {
- _offset = offset;
- _dateTime = ValidateDate(dt, ref _offset);
- }
- AssertValid();
- }
-
- public DvDateTimeZone(SysDateTimeOffset dto)
- {
- // Since it is constructed from a SysDateTimeOffset, all the validations should work.
- var success = TryValidateOffset(dto.Offset.Ticks, out _offset);
- Contracts.Assert(success);
- _dateTime = ValidateDate(new DvDateTime(dto.DateTime), ref _offset);
- Contracts.Assert(!_dateTime.IsNA);
- Contracts.Assert(!_offset.IsNA);
- AssertValid();
- }
-
- ///
- /// Constructs a DvDateTimeZone from a clock date-time and a time zone offset from UTC.
- ///
- /// The clock time
- /// The offset
- public DvDateTimeZone(DvDateTime dt, DvTimeSpan offset)
- {
- if (dt.IsNA || offset.IsNA || !TryValidateOffset(offset.Ticks, out _offset))
- {
- _dateTime = DvDateTime.NA;
- _offset = DvInt2.NA;
- }
- else
- _dateTime = ValidateDate(dt, ref _offset);
- AssertValid();
- }
-
- ///
- /// This method takes a DvDateTime representing clock time, and a TimeSpan representing an offset,
- /// validates that both the clock time and the UTC time (which is the clock time minus the offset)
- /// are within the valid range, and returns a DvDateTime representing the UTC time (dateTime-offset).
- ///
- /// The clock time
- /// The offset. This value is assumed to be validated as a legal offset:
- /// a value in whole minutes, between -14 and 14 hours.
- /// The UTC DvDateTime representing the input clock time minus the offset
- private static DvDateTime ValidateDate(DvDateTime dateTime, ref DvInt2 offset)
- {
- Contracts.Assert(!dateTime.IsNA);
- Contracts.Assert(!offset.IsNA);
-
- // Validate that both the UTC and clock times are legal.
- Contracts.Assert(MinMinutesOffset <= offset.RawValue && offset.RawValue <= MaxMinutesOffset);
- var offsetTicks = offset.RawValue * TicksPerMinute;
- // This operation cannot overflow because offset should have already been validated to be within
- // 14 hours and the DateTime instance is more than that distance from the boundaries of Int64.
- long utcTicks = dateTime.Ticks.RawValue - offsetTicks;
- var dvdt = new DvDateTime(utcTicks);
- if (dvdt.IsNA)
- offset = DvInt2.NA;
- return dvdt;
- }
-
- ///
- /// This method takes a TimeSpan offset, validates that it is a legal offset for DvDateTimeZone (i.e.
- /// in whole minutes, and between -14 and 14 hours), and returns the offset in number of minutes.
- ///
- ///
- ///
- ///
- private static bool TryValidateOffset(DvInt8 offsetTicks, out DvInt2 offset)
- {
- if (offsetTicks.IsNA || offsetTicks.RawValue % TicksPerMinute != 0)
- {
- offset = DvInt2.NA;
- return false;
- }
-
- long mins = offsetTicks.RawValue / TicksPerMinute;
- short res = (short)mins;
- if (res != mins || res > MaxMinutesOffset || res < MinMinutesOffset)
- {
- offset = DvInt2.NA;
- return false;
- }
- offset = res;
- Contracts.Assert(!offset.IsNA);
- return true;
- }
-
- [Conditional("DEBUG")]
- private void AssertValid()
- {
- _dateTime.AssertValid();
- if (_dateTime.IsNA)
- Contracts.Assert(_offset.IsNA);
- else
- {
- Contracts.Assert(MinMinutesOffset <= _offset.RawValue && _offset.RawValue <= MaxMinutesOffset);
- Contracts.Assert((ulong)(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute)
- <= (ulong)DvDateTime.MaxTicks);
- }
- }
-
- public DvDateTime ClockDateTime
- {
- get
- {
- AssertValid();
- if (_dateTime.IsNA)
- return DvDateTime.NA;
- var res = new DvDateTime(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute);
- Contracts.Assert(!res.IsNA);
- return res;
- }
- }
-
- ///
- /// Gets the UTC date and time.
- ///
- public DvDateTime UtcDateTime
- {
- get
- {
- AssertValid();
- if (IsNA)
- return DvDateTime.NA;
- return _dateTime;
- }
- }
-
- ///
- /// Gets the offset as a time span.
- ///
- public DvTimeSpan Offset
- {
- get
- {
- AssertValid();
- if (_offset.IsNA)
- return DvTimeSpan.NA;
- return new DvTimeSpan(_offset.RawValue * TicksPerMinute);
- }
- }
-
- ///
- /// Gets the offset in minutes.
- ///
- public DvInt2 OffsetMinutes
- {
- get
- {
- AssertValid();
- return _offset;
- }
- }
-
- // REVIEW: Add more System.DateTimeOffset members returning their corresponding 'Dv' types (task 4255).
-
- ///
- /// Gets the date component of the ClockDateTime.
- ///
- public DvDateTime ClockDate
- {
- get
- {
- AssertValid();
- if (IsNA)
- return DvDateTime.NA;
- return ClockDateTime.Date;
- }
- }
-
- ///
- /// Gets the date component of the UtcDateTime.
- ///
- public DvDateTime UtcDate
- {
- get
- {
- AssertValid();
- if (IsNA)
- return DvDateTime.NA;
- return _dateTime.Date;
- }
- }
-
- ///
- /// Gets a DvDateTimeZone object representing the current UTC date and time (with offset=0).
- ///
- public static DvDateTimeZone UtcNow { get { return new DvDateTimeZone(SysDateTimeOffset.UtcNow); } }
-
- public bool IsNA
- {
- get
- {
- AssertValid();
- return _dateTime.IsNA;
- }
- }
-
- // The missing value for DvDateTimeZone is represented by a DvDateTimeZone with _dateTime = DvDateTime.NA
- // and _offset = 0.
- public static DvDateTimeZone NA
- {
- get { return new DvDateTimeZone(DvDateTime.NA, DvInt2.NA); }
- }
-
- public static explicit operator SysDateTimeOffset?(DvDateTimeZone dvDto)
- {
- if (dvDto.IsNA)
- return null;
- return dvDto.GetSysDateTimeOffset();
- }
-
- public static implicit operator DvDateTimeZone(SysDateTimeOffset sdto)
- {
- return new DvDateTimeZone(sdto);
- }
-
- public static implicit operator DvDateTimeZone(SysDateTimeOffset? sdto)
- {
- if (sdto == null)
- return DvDateTimeZone.NA;
- return new DvDateTimeZone(sdto.Value);
- }
-
- public override string ToString()
- {
- AssertValid();
- if (IsNA)
- return "";
-
- return GetSysDateTimeOffset().ToString("o");
- }
-
- private DateTimeOffset GetSysDateTimeOffset()
- {
- AssertValid();
- Contracts.Assert(!IsNA);
- return new SysDateTimeOffset(ClockDateTime.GetSysDateTime(), new TimeSpan(0, _offset.RawValue, 0));
- }
-
- ///
- /// Compare two values for equality. Note that this differs from System.DateTimeOffset's
- /// definition of Equals, which only compares the UTC values, not the offsets.
- ///
- public bool Equals(DvDateTimeZone other)
- {
- return _offset.RawValue == other._offset.RawValue && _dateTime.Equals(other._dateTime);
- }
-
- public override bool Equals(object obj)
- {
- return obj is DvDateTimeZone && Equals((DvDateTimeZone)obj);
- }
-
- ///
- /// Compare two values for ordering. Note that this differs from System.DateTimeOffset's
- /// definition of CompareTo, which only compares the UTC values, not the offsets.
- ///
- public int CompareTo(DvDateTimeZone other)
- {
- AssertValid();
- other.AssertValid();
-
- int res = _dateTime.CompareTo(other._dateTime);
- if (res != 0)
- return res;
- if (_offset.RawValue == other._offset.RawValue)
- return 0;
- return _offset.RawValue < other._offset.RawValue ? -1 : 1;
- }
-
- public override int GetHashCode()
- {
- return Hashing.CombineHash(_dateTime.GetHashCode(), _offset.GetHashCode());
- }
- }
-
- ///
- /// A struct to represent a DateTime column type
- ///
- public struct DvTimeSpan : IEquatable, IComparable
- {
- private readonly DvInt8 _ticks;
-
- public DvInt8 Ticks { get { return _ticks; } }
-
- public DvTimeSpan(DvInt8 ticks)
- {
- _ticks = ticks;
- }
-
- public DvTimeSpan(SysTimeSpan sts)
- {
- _ticks = sts.Ticks;
- }
-
- public DvTimeSpan(SysTimeSpan? sts)
- {
- _ticks = sts != null ? sts.GetValueOrDefault().Ticks : DvInt8.NA;
- }
-
- public bool IsNA
- {
- get { return _ticks.IsNA; }
- }
-
- public static DvTimeSpan NA
- {
- get { return new DvTimeSpan(DvInt8.NA); }
- }
-
- public static explicit operator SysTimeSpan?(DvTimeSpan ts)
- {
- if (ts.IsNA)
- return null;
- return new SysTimeSpan(ts._ticks.RawValue);
- }
-
- public static implicit operator DvTimeSpan(SysTimeSpan sts)
- {
- return new DvTimeSpan(sts);
- }
-
- public static implicit operator DvTimeSpan(SysTimeSpan? sts)
- {
- return new DvTimeSpan(sts);
- }
-
- public override string ToString()
- {
- if (IsNA)
- return "";
- return new SysTimeSpan(_ticks.RawValue).ToString("c");
- }
-
- public bool Equals(DvTimeSpan other)
- {
- return _ticks.RawValue == other._ticks.RawValue;
- }
-
- public override bool Equals(object obj)
- {
- return obj is DvTimeSpan && Equals((DvTimeSpan)obj);
- }
-
- public int CompareTo(DvTimeSpan other)
- {
- if (_ticks.RawValue == other._ticks.RawValue)
- return 0;
- return _ticks.RawValue < other._ticks.RawValue ? -1 : 1;
- }
-
- public override int GetHashCode()
- {
- return _ticks.GetHashCode();
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvBool.cs b/src/Microsoft.ML.Core/Data/DvBool.cs
deleted file mode 100644
index f17cb596d4..0000000000
--- a/src/Microsoft.ML.Core/Data/DvBool.cs
+++ /dev/null
@@ -1,226 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using BL = DvBool;
- using R4 = Single;
- using R8 = Double;
-
- public struct DvBool : IEquatable, IComparable
- {
- private const byte _false = 0;
- private const byte _true = 1;
- private const byte _na = 128;
- public const byte RawNA = _na;
-
- private byte _value;
-
- public static BL False { get { BL res; res._value = _false; return res; } }
- public static BL True { get { BL res; res._value = _true; return res; } }
- public static BL NA { get { BL res; res._value = _na; return res; } }
-
- ///
- /// Property to return the raw value.
- ///
- public byte RawValue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value; }
- }
-
- ///
- /// Static method to return the raw value. This is more convenient than the
- /// property in code-generation scenarios.
- ///
- public static byte GetRawBits(BL a)
- {
- return a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private DvBool(int value)
- {
- Contracts.Assert(value == _true || value == _false || value == _na);
- _value = (byte)value;
- }
-
- ///
- /// Returns whether this value is false.
- ///
- public bool IsFalse
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == _false; }
- }
-
- ///
- /// Returns whether this value is true.
- ///
- public bool IsTrue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == _true; }
- }
-
- ///
- /// Returns whether this value is NA.
- ///
- public bool IsNA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value > _true; }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator BL(bool value)
- {
- BL res;
- res._value = value ? _true : _false;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator BL(bool? value)
- {
- BL res;
- res._value = value == null ? _na : value.GetValueOrDefault() ? _true : _false;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator bool(BL value)
- {
- switch (value._value)
- {
- case _false:
- return false;
- case _true:
- return true;
- default:
- throw Contracts.ExceptValue(nameof(value), "NA cast to bool");
- }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator bool?(BL value)
- {
- switch (value._value)
- {
- case _false:
- return false;
- case _true:
- return true;
- default:
- return null;
- }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R4(BL value)
- {
- if (value._value <= _true)
- return value._value;
- return Single.NaN;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R8(BL value)
- {
- if (value._value <= _true)
- return value._value;
- return Double.NaN;
- }
-
- public override int GetHashCode()
- {
- return _value.GetHashCode();
- }
-
- public override bool Equals(object obj)
- {
- if (obj is BL)
- return _value == ((BL)obj)._value;
- return false;
- }
-
- public bool Equals(BL other)
- {
- // Note that if one or both are "non-standard" NA values, this
- // could return false. Theoretically, that should never happen,
- // but unsafe code could cause it.
- return _value == other._value;
- }
-
- public int CompareTo(BL other)
- {
- // Note that if one or both are "non-standard" NA values, this could produce unexpected comparisons.
- // Theoretically, that should never happen, but unsafe code could cause it.
- Contracts.Assert(unchecked((sbyte)RawNA) < (sbyte)_false);
- if (_value == other._value)
- return 0;
- return (sbyte)_value < (sbyte)other._value ? -1 : 1;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator ==(BL a, BL b)
- {
- if (a._value <= _true && b._value <= _true)
- return a._value == b._value ? True : False;
- return NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !=(BL a, BL b)
- {
- if (a._value <= _true && b._value <= _true)
- return a._value != b._value ? True : False;
- return NA;
- }
-
- public override string ToString()
- {
- if (_value == _false)
- return "False";
- if (_value == _true)
- return "True";
- return "NA";
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !(BL a)
- {
- if (a._value <= _true)
- a._value ^= 1;
- return a;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator |(BL a, BL b)
- {
- if (a._value == _true)
- return a;
- if (b._value == _true)
- return b;
- if (a._value != _false)
- return a;
- return b;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator &(BL a, BL b)
- {
- if (a._value == _false)
- return a;
- if (b._value == _false)
- return b;
- if (a._value != _true)
- return a;
- return b;
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvInt1.cs b/src/Microsoft.ML.Core/Data/DvInt1.cs
deleted file mode 100644
index ced2a4688d..0000000000
--- a/src/Microsoft.ML.Core/Data/DvInt1.cs
+++ /dev/null
@@ -1,264 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using BL = DvBool;
- using I2 = DvInt2;
- using I4 = DvInt4;
- using I8 = DvInt8;
- using IX = DvInt1;
- using R4 = Single;
- using R8 = Double;
- using RawI8 = Int64;
- using RawIX = SByte;
-
- public struct DvInt1 : IEquatable, IComparable
- {
- public const RawIX RawNA = RawIX.MinValue;
-
- // Ideally this would be readonly. However, note that this struct has no
- // ctor, but instead only has conversion operators. The implicit conversion
- // operator from RawIX to DvIX performs better than an equivalent ctor,
- // and the conversion operator must assign the _value field.
- private RawIX _value;
-
- ///
- /// Property to return the raw value.
- ///
- public RawIX RawValue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value; }
- }
-
- ///
- /// Static method to return the raw value. This is more convenient than the
- /// property in code-generation scenarios.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RawIX GetRawBits(IX a)
- {
- return a._value;
- }
-
- public static IX NA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return RawNA; }
- }
-
- public bool IsNA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == RawNA; }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX value)
- {
- IX res;
- res._value = value;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX? value)
- {
- IX res;
- res._value = value ?? RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX(IX value)
- {
- if (value._value == RawNA)
- throw Contracts.ExceptValue(nameof(value), "NA cast to sbyte");
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX?(IX value)
- {
- if (value._value == RawNA)
- return null;
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(BL a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I2 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I4 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I8 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R4 a)
- {
- return (IX)(R8)a;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R4(IX a)
- {
- if (a._value == RawNA)
- return R4.NaN;
- return (R4)a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R8 a)
- {
- const R8 lim = -(R8)RawIX.MinValue;
- if (-lim < a && a < lim)
- {
- RawIX n = (RawIX)a;
-#if DEBUG
- Contracts.Assert(!a.IsNA());
- Contracts.Assert(n != RawNA);
- RawI8 nn = (RawI8)a;
- Contracts.Assert(nn == n);
- if (a >= 0)
- Contracts.Assert(a - 1 < n & n <= a);
- else
- Contracts.Assert(a <= n & n < a + 1);
-#endif
- return n;
- }
-
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R8(IX a)
- {
- if (a._value == RawNA)
- return R8.NaN;
- return (R8)a._value;
- }
-
- public override int GetHashCode()
- {
- return _value.GetHashCode();
- }
-
- public override bool Equals(object obj)
- {
- if (obj is IX)
- return _value == ((IX)obj)._value;
- return false;
- }
-
- public bool Equals(IX other)
- {
- return _value == other._value;
- }
-
- public int CompareTo(IX other)
- {
- if (_value == other._value)
- return 0;
- return _value < other._value ? -1 : 1;
- }
-
- public override string ToString()
- {
- if (_value == RawNA)
- return "NA";
- return _value.ToString();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator ==(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av == bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av != bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av < bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av <= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av >= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av > bv ? BL.True : BL.False;
- return BL.NA;
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvInt2.cs b/src/Microsoft.ML.Core/Data/DvInt2.cs
deleted file mode 100644
index 33599f6468..0000000000
--- a/src/Microsoft.ML.Core/Data/DvInt2.cs
+++ /dev/null
@@ -1,263 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using BL = DvBool;
- using I1 = DvInt1;
- using I4 = DvInt4;
- using I8 = DvInt8;
- using IX = DvInt2;
- using R4 = Single;
- using R8 = Double;
- using RawI8 = Int64;
- using RawIX = Int16;
-
- public struct DvInt2 : IEquatable, IComparable
- {
- public const RawIX RawNA = RawIX.MinValue;
-
- // Ideally this would be readonly. However, note that this struct has no
- // ctor, but instead only has conversion operators. The implicit conversion
- // operator from RawIX to DvIX performs better than an equivalent ctor,
- // and the conversion operator must assign the _value field.
- private RawIX _value;
-
- ///
- /// Property to return the raw value.
- ///
- public RawIX RawValue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value; }
- }
-
- ///
- /// Static method to return the raw value. This is more convenient than the
- /// property in code-generation scenarios.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RawIX GetRawBits(IX a)
- {
- return a._value;
- }
-
- public static IX NA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return RawNA; }
- }
-
- public bool IsNA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == RawNA; }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX value)
- {
- IX res;
- res._value = value;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX? value)
- {
- IX res;
- res._value = value ?? RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX(IX value)
- {
- if (value._value == RawNA)
- throw Contracts.ExceptValue(nameof(value), "NA cast to short");
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX?(IX value)
- {
- if (value._value == RawNA)
- return null;
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(BL a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I1 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I4 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I8 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R4 a)
- {
- return (IX)(R8)a;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R4(IX a)
- {
- if (a._value == RawNA)
- return R4.NaN;
- return (R4)a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R8 a)
- {
- const R8 lim = -(R8)RawIX.MinValue;
- if (-lim < a && a < lim)
- {
- RawIX n = (RawIX)a;
-#if DEBUG
- Contracts.Assert(!a.IsNA());
- Contracts.Assert(n != RawNA);
- RawI8 nn = (RawI8)a;
- Contracts.Assert(nn == n);
- if (a >= 0)
- Contracts.Assert(a - 1 < n & n <= a);
- else
- Contracts.Assert(a <= n & n < a + 1);
-#endif
- return n;
- }
-
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R8(IX a)
- {
- if (a._value == RawNA)
- return R8.NaN;
- return (R8)a._value;
- }
-
- public override int GetHashCode()
- {
- return _value.GetHashCode();
- }
-
- public override bool Equals(object obj)
- {
- if (obj is IX)
- return _value == ((IX)obj)._value;
- return false;
- }
-
- public bool Equals(IX other)
- {
- return _value == other._value;
- }
-
- public int CompareTo(IX other)
- {
- if (_value == other._value)
- return 0;
- return _value < other._value ? -1 : 1;
- }
-
- public override string ToString()
- {
- if (_value == RawNA)
- return "NA";
- return _value.ToString();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator ==(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av == bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av != bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av < bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av <= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av >= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av > bv ? BL.True : BL.False;
- return BL.NA;
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvInt4.cs b/src/Microsoft.ML.Core/Data/DvInt4.cs
deleted file mode 100644
index 23c7e89242..0000000000
--- a/src/Microsoft.ML.Core/Data/DvInt4.cs
+++ /dev/null
@@ -1,456 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using BL = DvBool;
- using I1 = DvInt1;
- using I2 = DvInt2;
- using I8 = DvInt8;
- using IX = DvInt4;
- using R4 = Single;
- using R8 = Double;
- using RawI8 = Int64;
- using RawIX = Int32;
-
- public struct DvInt4 : IEquatable, IComparable
- {
- public const RawIX RawNA = RawIX.MinValue;
-
- // Ideally this would be readonly. However, note that this struct has no
- // ctor, but instead only has conversion operators. The implicit conversion
- // operator from RawIX to DvIX performs better than an equivalent ctor,
- // and the conversion operator must assign the _value field.
- private RawIX _value;
-
- ///
- /// Property to return the raw value.
- ///
- public RawIX RawValue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value; }
- }
-
- ///
- /// Static method to return the raw value. This is more convenient than the
- /// property in code-generation scenarios.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RawIX GetRawBits(IX a)
- {
- return a._value;
- }
-
- public static IX NA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return RawNA; }
- }
-
- public bool IsNA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == RawNA; }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX value)
- {
- IX res;
- res._value = value;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX? value)
- {
- IX res;
- res._value = value ?? RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX(IX value)
- {
- if (value._value == RawNA)
- throw Contracts.ExceptValue(nameof(value), "NA cast to int");
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX?(IX value)
- {
- if (value._value == RawNA)
- return null;
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(BL a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I1 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I2 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(I8 a)
- {
- RawIX res = (RawIX)a.RawValue;
- if (res != a.RawValue)
- return RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R4 a)
- {
- return (IX)(R8)a;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R4(IX a)
- {
- if (a._value == RawNA)
- return R4.NaN;
- return (R4)a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R8 a)
- {
- const R8 lim = -(R8)RawIX.MinValue;
- if (-lim < a && a < lim)
- {
- RawIX n = (RawIX)a;
-#if DEBUG
- Contracts.Assert(!a.IsNA());
- Contracts.Assert(n != RawNA);
- RawI8 nn = (RawI8)a;
- Contracts.Assert(nn == n);
- if (a >= 0)
- Contracts.Assert(a - 1 < n & n <= a);
- else
- Contracts.Assert(a <= n & n < a + 1);
-#endif
- return n;
- }
-
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R8(IX a)
- {
- if (a._value == RawNA)
- return R8.NaN;
- return (R8)a._value;
- }
-
- public override int GetHashCode()
- {
- return _value.GetHashCode();
- }
-
- public override bool Equals(object obj)
- {
- if (obj is IX)
- return _value == ((IX)obj)._value;
- return false;
- }
-
- public bool Equals(IX other)
- {
- return _value == other._value;
- }
-
- public int CompareTo(IX other)
- {
- if (_value == other._value)
- return 0;
- return _value < other._value ? -1 : 1;
- }
-
- public override string ToString()
- {
- if (_value == RawNA)
- return "NA";
- return _value.ToString();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator ==(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av == bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av != bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av < bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av <= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av >= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av > bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator -(IX a)
- {
- return -a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator +(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- {
- var res = av + bv;
- // Overflow happens iff the sign of the result is different than both source values.
- if ((av ^ res) >= 0)
- return res;
- if ((bv ^ res) >= 0)
- return res;
- }
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator -(IX a, IX b)
- {
- var av = a._value;
- var bv = -b._value;
- if (av != RawNA && bv != RawNA)
- {
- var res = av + bv;
- // Overflow happens iff the sign of the result is different than both source values.
- if ((av ^ res) >= 0)
- return res;
- if ((bv ^ res) >= 0)
- return res;
- }
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator *(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- {
- RawI8 res = (RawI8)av * bv;
- if (-RawIX.MaxValue <= res && res <= RawIX.MaxValue)
- return (RawIX)res;
- }
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator /(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA && bv != 0)
- return av / bv;
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator %(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA && bv != 0)
- return av % bv;
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Abs(IX a)
- {
- // Can't use Math.Abs since it throws on the RawNA value.
- return a._value >= 0 ? a._value : -a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Sign(IX a)
- {
- var val = a._value;
- var neg = -val;
- // This works for NA since -RawNA == RawNA.
- return val > neg ? +1 : val < neg ? -1 : val;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Min(IX a, IX b)
- {
- var v1 = a._value;
- var v2 = b._value;
- // This works for NA since RawNA == RawIX.MinValue.
- return v1 <= v2 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public IX Min(IX b)
- {
- var v1 = _value;
- var v2 = b._value;
- // This works for NA since RawNA == RawIX.MinValue.
- return v1 <= v2 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Max(IX a, IX b)
- {
- var v1 = a._value;
- var v2 = b._value;
- // This works for NA since RawNA - 1 == RawIX.MaxValue.
- return v1 - 1 >= v2 - 1 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public IX Max(IX b)
- {
- var v1 = _value;
- var v2 = b._value;
- // This works for NA since RawNA - 1 == RawIX.MaxValue.
- return v1 - 1 >= v2 - 1 ? v1 : v2;
- }
-
- ///
- /// Raise a to the b power. Special cases:
- /// * 1^NA => 1
- /// * NA^0 => 1
- ///
- public static IX Pow(IX a, IX b)
- {
- var av = a.RawValue;
- var bv = b.RawValue;
-
- if (av == 1)
- return 1;
- switch (bv)
- {
- case 0:
- return 1;
- case 1:
- return av;
- case 2:
- return a * a;
- case RawNA:
- return RawNA;
- }
- if (av == -1)
- return (bv & 1) == 0 ? 1 : -1;
- if (bv < 0)
- return RawNA;
- if (av == RawNA)
- return RawNA;
-
- // Since the abs of the base is at least two, the exponent must be less than 31.
- if (bv >= 31)
- return RawNA;
-
- bool neg = false;
- if (av < 0)
- {
- av = -av;
- neg = (bv & 1) != 0;
- }
- Contracts.Assert(av >= 2);
-
- // Since the exponent is at least three, the base must be <= 1290.
- Contracts.Assert(bv >= 3);
- if (av > 1290)
- return RawNA;
-
- // REVIEW: Should we use a checked context and exception catching like I8 does?
- ulong u = (ulong)(uint)av;
- ulong result = 1;
- for (; ; )
- {
- if ((bv & 1) != 0 && (result *= u) > RawIX.MaxValue)
- return RawNA;
- bv >>= 1;
- if (bv == 0)
- break;
- if ((u *= u) > RawIX.MaxValue)
- return RawNA;
- }
- Contracts.Assert(result <= RawIX.MaxValue);
-
- var res = (RawIX)result;
- if (neg)
- res = -res;
- return res;
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvInt8.cs b/src/Microsoft.ML.Core/Data/DvInt8.cs
deleted file mode 100644
index 3212e21fa6..0000000000
--- a/src/Microsoft.ML.Core/Data/DvInt8.cs
+++ /dev/null
@@ -1,511 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Runtime.CompilerServices;
-
-namespace Microsoft.ML.Runtime.Data
-{
- using BL = DvBool;
- using I1 = DvInt1;
- using I2 = DvInt2;
- using I4 = DvInt4;
- using IX = DvInt8;
- using R4 = Single;
- using R8 = Double;
- using RawIX = Int64;
-
- public struct DvInt8 : IEquatable, IComparable
- {
- public const RawIX RawNA = RawIX.MinValue;
-
- // Ideally this would be readonly. However, note that this struct has no
- // ctor, but instead only has conversion operators. The implicit conversion
- // operator from RawIX to DvIX performs better than an equivalent ctor,
- // and the conversion operator must assign the _value field.
- private RawIX _value;
-
- ///
- /// Property to return the raw value.
- ///
- public RawIX RawValue
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value; }
- }
-
- ///
- /// Static method to return the raw value. This is more convenient than the
- /// property in code-generation scenarios.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static RawIX GetRawBits(IX a)
- {
- return a._value;
- }
-
- public static IX NA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return RawNA; }
- }
-
- public bool IsNA
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- get { return _value == RawNA; }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX value)
- {
- IX res;
- res._value = value;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(RawIX? value)
- {
- IX res;
- res._value = value ?? RawNA;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX(IX value)
- {
- if (value._value == RawNA)
- throw Contracts.ExceptValue(nameof(value), "NA cast to long");
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator RawIX?(IX value)
- {
- if (value._value == RawNA)
- return null;
- return value._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(BL a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I1 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I2 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static implicit operator IX(I4 a)
- {
- if (a.IsNA)
- return RawNA;
- return (RawIX)a.RawValue;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R4 a)
- {
- return (IX)(R8)a;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R4(IX a)
- {
- if (a._value == RawNA)
- return R4.NaN;
- return (R4)a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator IX(R8 a)
- {
- const R8 lim = -(R8)RawIX.MinValue;
- if (-lim < a && a < lim)
- {
- RawIX n = (RawIX)a;
-#if DEBUG
- Contracts.Assert(!a.IsNA());
- Contracts.Assert(n != RawNA);
- // Note that an R8 cannot represent long.MaxValue exactly so y + 1.0 below might be the same as y.
- R8 x = a;
- R8 y = n;
- if (a < 0)
- {
- x = -x;
- y = -y;
- }
- Contracts.Assert(y <= x);
- Contracts.Assert(x < y + 1.0 | y + 1.0 == y & x == y);
-#endif
- return n;
- }
-
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static explicit operator R8(IX a)
- {
- if (a._value == RawNA)
- return R8.NaN;
- return (R8)a._value;
- }
-
- public override int GetHashCode()
- {
- return _value.GetHashCode();
- }
-
- public override bool Equals(object obj)
- {
- if (obj is IX)
- return _value == ((IX)obj)._value;
- return false;
- }
-
- public bool Equals(IX other)
- {
- return _value == other._value;
- }
-
- public int CompareTo(IX other)
- {
- if (_value == other._value)
- return 0;
- return _value < other._value ? -1 : 1;
- }
-
- public override string ToString()
- {
- if (_value == RawNA)
- return "NA";
- return _value.ToString();
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator ==(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av == bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator !=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av != bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av < bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator <=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av <= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >=(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av >= bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL operator >(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- return av > bv ? BL.True : BL.False;
- return BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator -(IX a)
- {
- return -a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator +(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA)
- {
- var res = av + bv;
- // Overflow happens iff the sign of the result is different than both source values.
- if ((av ^ res) >= 0)
- return res;
- if ((bv ^ res) >= 0)
- return res;
- }
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator -(IX a, IX b)
- {
- var av = a._value;
- var bv = -b._value;
- if (av != RawNA && bv != RawNA)
- {
- var res = av + bv;
- // Overflow happens iff the sign of the result is different than both source values.
- if ((av ^ res) >= 0)
- return res;
- if ((bv ^ res) >= 0)
- return res;
- }
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator *(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- bool neg = (av ^ bv) < 0;
- if (av < 0)
- {
- if (av == RawNA)
- return RawNA;
- av = -av;
- }
- if (bv < 0)
- {
- if (bv == RawNA)
- return RawNA;
- bv = -bv;
- }
-
- // Deal with the low 32 bits.
- ulong lo1 = (ulong)av & 0x00000000FFFFFFFF;
- ulong lo2 = (ulong)bv & 0x00000000FFFFFFFF;
- RawIX res = (RawIX)(lo1 * lo2);
- if (res < 0)
- return RawNA;
-
- // Get the high 32 bits, including cross terms.
- ulong hi1 = (ulong)av >> 32;
- ulong hi2 = (ulong)bv >> 32;
- if (hi1 != 0)
- {
- // If both high words are non-zero, overflow is guaranteed.
- if (hi2 != 0)
- return RawNA;
- // Compute the cross term.
- ulong tmp = hi1 * lo2;
- if ((tmp & 0xFFFFFFFF80000000) != 0)
- return RawNA;
- res += (long)(tmp << 32);
- if (res < 0)
- return RawNA;
- }
- else if (hi2 != 0)
- {
- // Compute the cross term.
- ulong tmp = hi2 * lo1;
- if ((tmp & 0xFFFFFFFF80000000) != 0)
- return RawNA;
- res += (long)(tmp << 32);
- if (res < 0)
- return RawNA;
- }
-
- // Adjust the sign.
- if (neg)
- res = -res;
- return res;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator /(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA && bv != 0)
- return av / bv;
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX operator %(IX a, IX b)
- {
- var av = a._value;
- var bv = b._value;
- if (av != RawNA && bv != RawNA && bv != 0)
- return av % bv;
- return RawNA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Abs(IX a)
- {
- // Can't use Math.Abs since it throws on the RawNA value.
- return a._value >= 0 ? a._value : -a._value;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Sign(IX a)
- {
- var val = a._value;
- var neg = -val;
- // This works for NA since -RawNA == RawNA.
- return val > neg ? +1 : val < neg ? -1 : val;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Min(IX a, IX b)
- {
- var v1 = a._value;
- var v2 = b._value;
- // This works for NA since RawNA == RawIX.MinValue.
- return v1 <= v2 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public IX Min(IX b)
- {
- var v1 = _value;
- var v2 = b._value;
- // This works for NA since RawNA == RawIX.MinValue.
- return v1 <= v2 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static IX Max(IX a, IX b)
- {
- var v1 = a._value;
- var v2 = b._value;
- // This works for NA since RawNA - 1 == RawIX.MaxValue.
- return v1 - 1 >= v2 - 1 ? v1 : v2;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public IX Max(IX b)
- {
- var v1 = _value;
- var v2 = b._value;
- // This works for NA since RawNA - 1 == RawIX.MaxValue.
- return v1 - 1 >= v2 - 1 ? v1 : v2;
- }
-
- ///
- /// Raise a to the b power. Special cases:
- /// * 1^NA => 1
- /// * NA^0 => 1
- ///
- public static IX Pow(IX a, IX b)
- {
- var av = a.RawValue;
- var bv = b.RawValue;
-
- if (av == 1)
- return 1;
- switch (bv)
- {
- case 0:
- return 1;
- case 1:
- return av;
- case 2:
- return a * a;
- case RawNA:
- return RawNA;
- }
- if (av == -1)
- return (bv & 1) == 0 ? 1 : -1;
- if (bv < 0)
- return RawNA;
- if (av == RawNA)
- return RawNA;
-
- // Since the abs of the base is at least two, the exponent must be less than 63.
- if (bv >= 63)
- return RawNA;
-
- bool neg = false;
- if (av < 0)
- {
- av = -av;
- neg = (bv & 1) != 0;
- }
- Contracts.Assert(av >= 2);
-
- // Since the exponent is at least three, the base must be < 2^21.
- Contracts.Assert(bv >= 3);
- if (av >= (1L << 21))
- return RawNA;
-
- long res = 1;
- long x = av;
- // REVIEW: Is the catch too slow in the overflow case?
- try
- {
- checked
- {
- for (; ; )
- {
- if ((bv & 1) != 0)
- res *= x;
- bv >>= 1;
- if (bv == 0)
- break;
- x *= x;
- }
- }
- }
- catch (OverflowException)
- {
- return RawNA;
- }
- Contracts.Assert(res > 0);
-
- if (neg)
- res = -res;
- return res;
- }
- }
-}
diff --git a/src/Microsoft.ML.Core/Data/DvText.cs b/src/Microsoft.ML.Core/Data/DvText.cs
deleted file mode 100644
index 04d3bd8918..0000000000
--- a/src/Microsoft.ML.Core/Data/DvText.cs
+++ /dev/null
@@ -1,680 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.Collections.Generic;
-using System.Text;
-using Microsoft.ML.Runtime.Internal.Utilities;
-
-namespace Microsoft.ML.Runtime.Data
-{
- ///
- /// A text value. This essentially wraps a portion of a string. This can distinguish between a length zero
- /// span of characters and "NA", the latter having a Length of -1.
- ///
- public struct DvText : IEquatable, IComparable
- {
- ///
- /// The fields/properties , , and are
- /// private so client code can't easily "cheat" and look outside the characters. Client
- /// code that absolutely needs access to this information can call .
- ///
- private readonly string _outerBuffer;
- private readonly int _ichMin;
-
- ///
- /// For the "NA" value, this is -1; otherwise, it is the number of characters in the text.
- ///
- public readonly int Length;
-
- private int IchLim => _ichMin + Length;
-
- ///
- /// Gets a DvText that represents "NA", aka "Missing".
- ///
- public static DvText NA => new DvText(missing: true);
-
- ///
- /// Gets an empty (zero character) DvText.
- ///
- public static DvText Empty => default(DvText);
-
- ///
- /// Gets whether this DvText contains any characters. Equivalent to Length > 0.
- ///
- public bool HasChars => Length > 0;
-
- ///
- /// Gets whether this DvText is empty (distinct from NA). Equivalent to Length == 0.
- ///
- public bool IsEmpty
- {
- get
- {
- Contracts.Assert(Length >= -1);
- return Length == 0;
- }
- }
-
- ///
- /// Gets whether this DvText represents "NA". Equivalent to Length == -1.
- ///
- public bool IsNA
- {
- get
- {
- Contracts.Assert(Length >= -1);
- return Length < 0;
- }
- }
-
- ///
- /// Gets the indicated character in the text.
- ///
- public char this[int ich]
- {
- get
- {
- Contracts.CheckParam(0 <= ich & ich < Length, nameof(ich));
- return _outerBuffer[ich + _ichMin];
- }
- }
-
- private DvText(bool missing)
- {
- _outerBuffer = null;
- _ichMin = 0;
- Length = missing ? -1 : 0;
- }
-
- ///
- /// Constructor using the indicated range of characters in the given string.
- ///
- public DvText(string text, int ichMin, int ichLim)
- {
- Contracts.CheckValueOrNull(text);
- Contracts.CheckParam(0 <= ichMin & ichMin <= Utils.Size(text), nameof(ichMin));
- Contracts.CheckParam(ichMin <= ichLim & ichLim <= Utils.Size(text), nameof(ichLim));
- Length = ichLim - ichMin;
- if (Length == 0)
- {
- _outerBuffer = null;
- _ichMin = 0;
- }
- else
- {
- _outerBuffer = text;
- _ichMin = ichMin;
- }
- }
-
- ///
- /// Constructor using the indicated string.
- ///
- public DvText(string text)
- {
- Contracts.CheckValueOrNull(text);
- Length = Utils.Size(text);
- if (Length == 0)
- _outerBuffer = null;
- else
- _outerBuffer = text;
- _ichMin = 0;
- }
-
- ///
- /// This method retrieves the raw buffer information. The only characters that should be
- /// referenced in the returned string are those between the returned min and lim indices.
- /// If this is an NA value, the min will be zero and the lim will be -1. For either an
- /// empty or NA value, the returned string may be null.
- ///
- public string GetRawUnderlyingBufferInfo(out int ichMin, out int ichLim)
- {
- ichMin = _ichMin;
- ichLim = ichMin + Length;
- return _outerBuffer;
- }
-
- ///
- /// This compares the two text values with NA propagation semantics.
- ///
- public static DvBool operator ==(DvText a, DvText b)
- {
- if (a.IsNA || b.IsNA)
- return DvBool.NA;
-
- if (a.Length != b.Length)
- return DvBool.False;
- for (int i = 0; i < a.Length; i++)
- {
- if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i])
- return DvBool.False;
- }
- return DvBool.True;
- }
-
- ///
- /// This compares the two text values with NA propagation semantics.
- ///
- public static DvBool operator !=(DvText a, DvText b)
- {
- if (a.IsNA || b.IsNA)
- return DvBool.NA;
-
- if (a.Length != b.Length)
- return DvBool.True;
- for (int i = 0; i < a.Length; i++)
- {
- if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i])
- return DvBool.True;
- }
- return DvBool.False;
- }
-
- public override int GetHashCode()
- {
- if (IsNA)
- return 0;
- return (int)Hash(42);
- }
-
- public override bool Equals(object obj)
- {
- if (obj is DvText)
- return Equals((DvText)obj);
- return false;
- }
-
- ///
- /// This implements IEquatable's Equals method. Returns true if both are NA.
- /// For NA propagating equality comparison, use the == operator.
- ///
- public bool Equals(DvText b)
- {
- if (Length != b.Length)
- return false;
- Contracts.Assert(HasChars == b.HasChars);
- for (int i = 0; i < Length; i++)
- {
- if (_outerBuffer[_ichMin + i] != b._outerBuffer[b._ichMin + i])
- return false;
- }
- return true;
- }
-
- ///
- /// Does not propagate NA values. Returns true if both are NA (same as a.Equals(b)).
- /// For NA propagating equality comparison, use the == operator.
- ///
- public static bool Identical(DvText a, DvText b)
- {
- if (a.Length != b.Length)
- return false;
- if (a.HasChars)
- {
- Contracts.Assert(b.HasChars);
- for (int i = 0; i < a.Length; i++)
- {
- if (a._outerBuffer[a._ichMin + i] != b._outerBuffer[b._ichMin + i])
- return false;
- }
- }
- return true;
- }
-
- ///
- /// Compare equality with the given system string value. Returns false if "this" is NA.
- ///
- public bool EqualsStr(string s)
- {
- Contracts.CheckValueOrNull(s);
-
- // Note that "NA" doesn't match any string.
- if (s == null)
- return Length == 0;
-
- if (s.Length != Length)
- return false;
- for (int i = 0; i < Length; i++)
- {
- if (s[i] != _outerBuffer[_ichMin + i])
- return false;
- }
- return true;
- }
-
- ///
- /// For implementation of . Uses code point comparison.
- /// Generally, this is not appropriate for sorting for presentation to a user.
- /// Sorts NA before everything else.
- ///
- public int CompareTo(DvText other)
- {
- if (IsNA)
- return other.IsNA ? 0 : -1;
- if (other.IsNA)
- return +1;
-
- int len = Math.Min(Length, other.Length);
- for (int ich = 0; ich < len; ich++)
- {
- char ch1 = _outerBuffer[_ichMin + ich];
- char ch2 = other._outerBuffer[other._ichMin + ich];
- if (ch1 != ch2)
- return ch1 < ch2 ? -1 : +1;
- }
- if (len < other.Length)
- return -1;
- if (len < Length)
- return +1;
- return 0;
- }
-
- ///
- /// Return a DvText consisting of characters from ich to the end of this DvText.
- ///
- public DvText SubSpan(int ich)
- {
- Contracts.CheckParam(0 <= ich & ich <= Length, nameof(ich));
- return new DvText(_outerBuffer, ich + _ichMin, IchLim);
- }
-
- ///
- /// Return a DvText consisting of the indicated range of characters.
- ///
- public DvText SubSpan(int ichMin, int ichLim)
- {
- Contracts.CheckParam(0 <= ichMin & ichMin <= Length, nameof(ichMin));
- Contracts.CheckParam(ichMin <= ichLim & ichLim <= Length, nameof(ichLim));
- return new DvText(_outerBuffer, ichMin + _ichMin, ichLim + _ichMin);
- }
-
- ///
- /// Return a non-null string corresponding to the characters in this DvText.
- /// Note that an empty string is returned for both Empty and NA.
- ///
- public override string ToString()
- {
- if (!HasChars)
- return "";
- Contracts.AssertNonEmpty(_outerBuffer);
- if (_ichMin == 0 && Length == _outerBuffer.Length)
- return _outerBuffer;
- return _outerBuffer.Substring(_ichMin, Length);
- }
-
- public string ToString(int ichMin)
- {
- Contracts.CheckParam(0 <= ichMin & ichMin <= Length, nameof(ichMin));
- if (ichMin == Length)
- return "";
- ichMin += _ichMin;
- if (ichMin == 0 && Length == _outerBuffer.Length)
- return _outerBuffer;
- return _outerBuffer.Substring(ichMin, IchLim - ichMin);
- }
-
- public IEnumerable Split(char[] separators)
- {
- Contracts.CheckValueOrNull(separators);
-
- if (!HasChars)
- yield break;
-
- if (separators == null || separators.Length == 0)
- {
- yield return this;
- yield break;
- }
-
- string text = _outerBuffer;
- int ichLim = IchLim;
- if (separators.Length == 1)
- {
- char chSep = separators[0];
- for (int ichCur = _ichMin; ; )
- {
- int ichMin = ichCur;
- for (; ; ichCur++)
- {
- Contracts.Assert(ichCur <= ichLim);
- if (ichCur >= ichLim)
- {
- yield return new DvText(text, ichMin, ichCur);
- yield break;
- }
- if (text[ichCur] == chSep)
- break;
- }
-
- yield return new DvText(text, ichMin, ichCur);
-
- // Skip the separator.
- ichCur++;
- }
- }
- else
- {
- for (int ichCur = _ichMin; ; )
- {
- int ichMin = ichCur;
- for (; ; ichCur++)
- {
- Contracts.Assert(ichCur <= ichLim);
- if (ichCur >= ichLim)
- {
- yield return new DvText(text, ichMin, ichCur);
- yield break;
- }
- // REVIEW: Can this be faster?
- if (ContainsChar(text[ichCur], separators))
- break;
- }
-
- yield return new DvText(text, ichMin, ichCur);
-
- // Skip the separator.
- ichCur++;
- }
- }
- }
-
- ///
- /// Splits this instance on the left-most occurrence of separator and produces the left
- /// and right values. If this instance does not contain the separator character,
- /// this returns false and sets to this instance and
- /// to the default value.
- ///
- public bool SplitOne(char separator, out DvText left, out DvText right)
- {
- if (!HasChars)
- {
- left = this;
- right = default(DvText);
- return false;
- }
-
- string text = _outerBuffer;
- int ichMin = _ichMin;
- int ichLim = IchLim;
-
- int ichCur = ichMin;
- for (; ; ichCur++)
- {
- Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim);
- if (ichCur >= ichLim)
- {
- left = this;
- right = default(DvText);
- return false;
- }
- if (text[ichCur] == separator)
- break;
- }
-
- // Note that we don't use any fields of "this" here in case one
- // of the out parameters is the same as "this".
- left = new DvText(text, ichMin, ichCur);
- right = new DvText(text, ichCur + 1, ichLim);
- return true;
- }
-
- ///
- /// Splits this instance on the left-most occurrence of an element of separators character array and
- /// produces the left and right values. If this instance does not contain any of the
- /// characters in separators, thiss return false and initializes to this instance
- /// and to the default value.
- ///
- public bool SplitOne(char[] separators, out DvText left, out DvText right)
- {
- Contracts.CheckValueOrNull(separators);
-
- if (!HasChars || separators == null || separators.Length == 0)
- {
- left = this;
- right = default(DvText);
- return false;
- }
-
- string text = _outerBuffer;
- int ichMin = _ichMin;
- int ichLim = IchLim;
-
- int ichCur = ichMin;
- if (separators.Length == 1)
- {
- // Note: This duplicates code of the other SplitOne, but doing so improves perf because this is
- // used so heavily in instances parsing.
- char chSep = separators[0];
- for (; ; ichCur++)
- {
- Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim);
- if (ichCur >= ichLim)
- {
- left = this;
- right = default(DvText);
- return false;
- }
- if (text[ichCur] == chSep)
- break;
- }
- }
- else
- {
- for (; ; ichCur++)
- {
- Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim);
- if (ichCur >= ichLim)
- {
- left = this;
- right = default(DvText);
- return false;
- }
- // REVIEW: Can this be faster?
- if (ContainsChar(text[ichCur], separators))
- break;
- }
- }
-
- // Note that we don't use any fields of "this" here in case one
- // of the out parameters is the same as "this".
- left = new DvText(text, _ichMin, ichCur);
- right = new DvText(text, ichCur + 1, ichLim);
- return true;
- }
-
- ///
- /// Splits this instance on the right-most occurrence of separator and produces the left
- /// and right values. If this instance does not contain the separator character,
- /// this returns false and sets to this instance and
- /// to the default value.
- ///
- public bool SplitOneRight(char separator, out DvText left, out DvText right)
- {
- if (!HasChars)
- {
- left = this;
- right = default(DvText);
- return false;
- }
-
- string text = _outerBuffer;
- int ichMin = _ichMin;
- int ichLim = IchLim;
-
- int ichCur = ichLim;
- for (; ; )
- {
- Contracts.Assert(ichMin <= ichCur && ichCur <= ichLim);
- if (--ichCur < ichMin)
- {
- left = this;
- right = default(DvText);
- return false;
- }
- if (text[ichCur] == separator)
- break;
- }
-
- // Note that we don't use any fields of "this" here in case one
- // of the out parameters is the same as "this".
- left = new DvText(text, ichMin, ichCur);
- right = new DvText(text, ichCur + 1, ichLim);
- return true;
- }
-
- // REVIEW: Can this be faster?
- private static bool ContainsChar(char ch, char[] rgch)
- {
- Contracts.CheckNonEmpty(rgch, nameof(rgch));
-
- for (int i = 0; i < rgch.Length; i++)
- {
- if (rgch[i] == ch)
- return true;
- }
- return false;
- }
-
- ///
- /// Returns a text span with leading and trailing spaces trimmed. Note that this
- /// will remove only spaces, not any form of whitespace.
- ///
- public DvText Trim()
- {
- if (!HasChars)
- return this;
- int ichMin = _ichMin;
- int ichLim = IchLim;
- if (_outerBuffer[ichMin] != ' ' && _outerBuffer[ichLim - 1] != ' ')
- return this;
-
- while (ichMin < ichLim && _outerBuffer[ichMin] == ' ')
- ichMin++;
- while (ichMin < ichLim && _outerBuffer[ichLim - 1] == ' ')
- ichLim--;
- return new DvText(_outerBuffer, ichMin, ichLim);
- }
-
- ///
- /// Returns a text span with leading and trailing whitespace trimmed.
- ///
- public DvText TrimWhiteSpace()
- {
- if (!HasChars)
- return this;
- int ichMin = _ichMin;
- int ichLim = IchLim;
- if (!char.IsWhiteSpace(_outerBuffer[ichMin]) && !char.IsWhiteSpace(_outerBuffer[ichLim - 1]))
- return this;
-
- while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichMin]))
- ichMin++;
- while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichLim - 1]))
- ichLim--;
- return new DvText(_outerBuffer, ichMin, ichLim);
- }
-
- ///
- /// Returns a text span with trailing whitespace trimmed.
- ///
- public DvText TrimEndWhiteSpace()
- {
- if (!HasChars)
- return this;
-
- int ichLim = IchLim;
- if (!char.IsWhiteSpace(_outerBuffer[ichLim - 1]))
- return this;
-
- int ichMin = _ichMin;
- while (ichMin < ichLim && char.IsWhiteSpace(_outerBuffer[ichLim - 1]))
- ichLim--;
-
- return new DvText(_outerBuffer, ichMin, ichLim);
- }
-
- ///
- /// This produces zero for an empty string.
- ///
- public bool TryParse(out Single value)
- {
- if (IsNA)
- {
- value = Single.NaN;
- return true;
- }
- var res = DoubleParser.Parse(out value, _outerBuffer, _ichMin, IchLim);
- Contracts.Assert(res != DoubleParser.Result.Empty || value == 0);
- return res <= DoubleParser.Result.Empty;
- }
-
- ///
- /// This produces zero for an empty string.
- ///
- public bool TryParse(out Double value)
- {
- if (IsNA)
- {
- value = Double.NaN;
- return true;
- }
- var res = DoubleParser.Parse(out value, _outerBuffer, _ichMin, IchLim);
- Contracts.Assert(res != DoubleParser.Result.Empty || value == 0);
- return res <= DoubleParser.Result.Empty;
- }
-
- public uint Hash(uint seed)
- {
- Contracts.Check(!IsNA);
- return Hashing.MurmurHash(seed, _outerBuffer, _ichMin, IchLim);
- }
-
- // REVIEW: Add method to NormStr.Pool that deal with DvText instead of the other way around.
- public NormStr AddToPool(NormStr.Pool pool)
- {
- Contracts.Check(!IsNA);
- Contracts.CheckValue(pool, nameof(pool));
- return pool.Add(_outerBuffer, _ichMin, IchLim);
- }
-
- public NormStr FindInPool(NormStr.Pool pool)
- {
- Contracts.CheckValue(pool, nameof(pool));
- if (IsNA)
- return null;
- return pool.Get(_outerBuffer, _ichMin, IchLim);
- }
-
- public void AddToStringBuilder(StringBuilder sb)
- {
- Contracts.CheckValue(sb, nameof(sb));
- if (HasChars)
- sb.Append(_outerBuffer, _ichMin, Length);
- }
-
- public void AddLowerCaseToStringBuilder(StringBuilder sb)
- {
- Contracts.CheckValue(sb, nameof(sb));
- if (HasChars)
- {
- int min = _ichMin;
- int j;
- for (j = min; j < IchLim; j++)
- {
- char ch = CharUtils.ToLowerInvariant(_outerBuffer[j]);
- if (ch != _outerBuffer[j])
- {
- sb.Append(_outerBuffer, min, j - min).Append(ch);
- min = j + 1;
- }
- }
-
- Contracts.Assert(j == IchLim);
- if (min != j)
- sb.Append(_outerBuffer, min, j - min);
- }
- }
- }
-}
\ No newline at end of file
diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs
index f7b91c3715..d952f57782 100644
--- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs
+++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs
@@ -42,12 +42,13 @@ public static class Kinds
public const string ScoreColumnSetId = "ScoreColumnSetId";
///
- /// Metadata kind that indicates the prediction kind as a string. E.g. "BinaryClassification". The value is typically a DvText.
+ /// Metadata kind that indicates the prediction kind as a string. E.g. "BinaryClassification".
+ /// The value is typically a ReadOnlyMemory<char>.
///
public const string ScoreColumnKind = "ScoreColumnKind";
///
- /// Metadata kind that indicates the value kind of the score column as a string. E.g. "Score", "PredictedLabel", "Probability". The value is typically a DvText.
+ /// Metadata kind that indicates the value kind of the score column as a string. E.g. "Score", "PredictedLabel", "Probability". The value is typically a ReadOnlyMemory.
///
public const string ScoreValueKind = "ScoreValueKind";
@@ -283,9 +284,9 @@ public static IEnumerable GetColumnSet(this ISchema schema, string metadata
var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
if (columnType != null && columnType.IsText)
{
- DvText val = default(DvText);
+ ReadOnlyMemory val = default;
schema.GetMetadata(metadataKind, col, ref val);
- if (val.EqualsStr(value))
+ if (ReadOnlyMemoryUtils.EqualsStr(value, val))
yield return col;
}
}
@@ -295,7 +296,7 @@ public static IEnumerable GetColumnSet(this ISchema schema, string metadata
/// Returns true if the specified column:
/// * is a vector of length N (including 0)
/// * has a SlotNames metadata
- /// * metadata type is VBuffer<DvText> of length N
+ /// * metadata type is VBuffer<ReadOnlyMemory<char>> of length N
///
public static bool HasSlotNames(this ISchema schema, int col, int vectorSize)
{
@@ -310,14 +311,14 @@ public static bool HasSlotNames(this ISchema schema, int col, int vectorSize)
&& type.ItemType.IsText;
}
- public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer slotNames)
+ public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer> slotNames)
{
Contracts.CheckValueOrNull(schema);
Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));
IReadOnlyList list;
if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize))
- slotNames = new VBuffer(vectorSize, 0, slotNames.Values, slotNames.Indices);
+ slotNames = new VBuffer>(vectorSize, 0, slotNames.Values, slotNames.Indices);
else
schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames);
}
@@ -343,12 +344,12 @@ public static bool HasKeyNames(this ISchema schema, int col, int keyCount)
/// The schema to query
/// Which column in the schema to query
/// True if and only if the column has the metadata
- /// set to the scalar value
+ /// set to the scalar value true
public static bool IsNormalized(this ISchema schema, int col)
{
Contracts.CheckValue(schema, nameof(schema));
- var value = default(DvBool);
- return schema.TryGetMetadata(BoolType.Instance, Kinds.IsNormalized, col, ref value) && value.IsTrue;
+ var value = default(bool);
+ return schema.TryGetMetadata(BoolType.Instance, Kinds.IsNormalized, col, ref value) && value;
}
///
@@ -436,9 +437,9 @@ public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex,
return isValid;
var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex);
- if (type?.RawType == typeof(VBuffer))
+ if (type?.RawType == typeof(VBuffer))
{
- VBuffer catIndices = default(VBuffer);
+ VBuffer catIndices = default(VBuffer);
schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices);
VBufferUtils.Densify(ref catIndices);
int columnSlotsCount = schema.GetColumnType(colIndex).AsVector.VectorSizeCore;
@@ -448,19 +449,19 @@ public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex,
isValid = true;
for (int i = 0; i < catIndices.Values.Length; i += 2)
{
- if (catIndices.Values[i].RawValue > catIndices.Values[i + 1].RawValue ||
- catIndices.Values[i].RawValue <= previousEndIndex ||
- catIndices.Values[i].RawValue >= columnSlotsCount ||
- catIndices.Values[i + 1].RawValue >= columnSlotsCount)
+ if (catIndices.Values[i] > catIndices.Values[i + 1] ||
+ catIndices.Values[i] <= previousEndIndex ||
+ catIndices.Values[i] >= columnSlotsCount ||
+ catIndices.Values[i + 1] >= columnSlotsCount)
{
isValid = false;
break;
}
- previousEndIndex = catIndices.Values[i + 1].RawValue;
+ previousEndIndex = catIndices.Values[i + 1];
}
if (isValid)
- categoricalFeatures = catIndices.Values.Select(val => val.RawValue).ToArray();
+ categoricalFeatures = catIndices.Values.Select(val => val).ToArray();
}
}
diff --git a/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs b/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs
new file mode 100644
index 0000000000..4b207ab507
--- /dev/null
+++ b/src/Microsoft.ML.Core/Data/ReadOnlyMemoryUtils.cs
@@ -0,0 +1,269 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Internal.Utilities;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Microsoft.ML.Runtime.Data
+{
+ public static class ReadOnlyMemoryUtils
+ {
+
+ ///
+ /// Compare equality with the given system string value.
+ ///
+ public static bool EqualsStr(string s, ReadOnlyMemory memory)
+ {
+ Contracts.CheckValueOrNull(s);
+
+ if (s == null)
+ return memory.Length == 0;
+
+ if (s.Length != memory.Length)
+ return false;
+
+ return memory.Span.SequenceEqual(s.AsSpan());
+ }
+
+ public static IEnumerable> Split(ReadOnlyMemory memory, char[] separators)
+ {
+ Contracts.CheckValueOrNull(separators);
+
+ if (memory.IsEmpty)
+ yield break;
+
+ if (separators == null || separators.Length == 0)
+ {
+ yield return memory;
+ yield break;
+ }
+
+ var span = memory.Span;
+ if (separators.Length == 1)
+ {
+ char chSep = separators[0];
+ for (int ichCur = 0; ;)
+ {
+ int nextSep = span.IndexOf(chSep);
+ if (nextSep == -1)
+ {
+ yield return memory.Slice(ichCur);
+ yield break;
+ }
+
+ yield return memory.Slice(ichCur, nextSep);
+
+ // Skip the separator.
+ ichCur += nextSep + 1;
+ span = memory.Slice(ichCur).Span;
+ }
+ }
+ else
+ {
+ for (int ichCur = 0; ;)
+ {
+ int nextSep = span.IndexOfAny(separators);
+ if (nextSep == -1)
+ {
+ yield return memory.Slice(ichCur);
+ yield break;
+ }
+
+ yield return memory.Slice(ichCur, nextSep);
+
+ // Skip the separator.
+ ichCur += nextSep + 1;
+ span = memory.Slice(ichCur).Span;
+ }
+ }
+ }
+
+ ///
+ /// Splits on the left-most occurrence of separator and produces the left
+ /// and right of values. If does not contain the separator character,
+ /// this returns false and sets to this instance and
+ /// to the default of value.
+ ///
+ public static bool SplitOne(ReadOnlyMemory memory, char separator, out ReadOnlyMemory left, out ReadOnlyMemory right)
+ {
+ if (memory.IsEmpty)
+ {
+ left = memory;
+ right = default;
+ return false;
+ }
+
+ int index = memory.Span.IndexOf(separator);
+ if (index == -1)
+ {
+ left = memory;
+ right = default;
+ return false;
+ }
+
+ left = memory.Slice(0, index);
+ right = memory.Slice(index + 1, memory.Length - index - 1);
+ return true;
+ }
+
+ ///
+ /// Splits on the left-most occurrence of an element of separators character array and
+ /// produces the left and right of values. If does not contain any of the
+ /// characters in separators, this return false and initializes to this instance
+ /// and to the default of value.
+ ///
+ public static bool SplitOne(ReadOnlyMemory memory, char[] separators, out ReadOnlyMemory left, out ReadOnlyMemory right)
+ {
+ Contracts.CheckValueOrNull(separators);
+
+ if (memory.IsEmpty || separators == null || separators.Length == 0)
+ {
+ left = memory;
+ right = default;
+ return false;
+ }
+
+ int index;
+ if (separators.Length == 1)
+ index = memory.Span.IndexOf(separators[0]);
+ else
+ index = memory.Span.IndexOfAny(separators);
+
+ if (index == -1)
+ {
+ left = memory;
+ right = default;
+ return false;
+ }
+
+ left = memory.Slice(0, index);
+ right = memory.Slice(index + 1, memory.Length - index - 1);
+ return true;
+ }
+
+ ///
+ /// Returns a of with leading and trailing spaces trimmed. Note that this
+ /// will remove only spaces, not any form of whitespace.
+ ///
+ public static ReadOnlyMemory TrimSpaces(ReadOnlyMemory memory)
+ {
+ if (memory.IsEmpty)
+ return memory;
+
+ int ichLim = memory.Length;
+ int ichMin = 0;
+ var span = memory.Span;
+ if (span[ichMin] != ' ' && span[ichLim - 1] != ' ')
+ return memory;
+
+ while (ichMin < ichLim && span[ichMin] == ' ')
+ ichMin++;
+ while (ichMin < ichLim && span[ichLim - 1] == ' ')
+ ichLim--;
+ return memory.Slice(ichMin, ichLim - ichMin);
+ }
+
+ ///
+ /// Returns a of with leading and trailing whitespace trimmed.
+ ///
+ public static ReadOnlyMemory TrimWhiteSpace(ReadOnlyMemory memory)
+ {
+ if (memory.IsEmpty)
+ return memory;
+
+ int ichMin = 0;
+ int ichLim = memory.Length;
+ var span = memory.Span;
+ if (!char.IsWhiteSpace(span[ichMin]) && !char.IsWhiteSpace(span[ichLim - 1]))
+ return memory;
+
+ while (ichMin < ichLim && char.IsWhiteSpace(span[ichMin]))
+ ichMin++;
+ while (ichMin < ichLim && char.IsWhiteSpace(span[ichLim - 1]))
+ ichLim--;
+
+ return memory.Slice(ichMin, ichLim - ichMin);
+ }
+
+ ///
+ /// Returns a of with trailing whitespace trimmed.
+ ///
+ public static ReadOnlyMemory TrimEndWhiteSpace(ReadOnlyMemory memory)
+ {
+ if (memory.IsEmpty)
+ return memory;
+
+ int ichLim = memory.Length;
+ var span = memory.Span;
+ if (!char.IsWhiteSpace(span[ichLim - 1]))
+ return memory;
+
+ while (0 < ichLim && char.IsWhiteSpace(span[ichLim - 1]))
+ ichLim--;
+
+ return memory.Slice(0, ichLim);
+ }
+
+ public static NormStr AddToPool(ReadOnlyMemory memory, NormStr.Pool pool)
+ {
+ Contracts.CheckValue(pool, nameof(pool));
+ return pool.Add(memory);
+ }
+
+ public static NormStr FindInPool(ReadOnlyMemory memory, NormStr.Pool pool)
+ {
+ Contracts.CheckValue(pool, nameof(pool));
+ return pool.Get(memory);
+ }
+
+ public static void AddLowerCaseToStringBuilder(ReadOnlySpan span, StringBuilder sb)
+ {
+ Contracts.CheckValue(sb, nameof(sb));
+
+ if (!span.IsEmpty)
+ {
+ int min = 0;
+ int j;
+ for (j = min; j < span.Length; j++)
+ {
+ char ch = CharUtils.ToLowerInvariant(span[j]);
+ if (ch != span[j])
+ {
+ sb.AppendSpan(span.Slice(min, j - min)).Append(ch);
+ min = j + 1;
+ }
+ }
+
+ Contracts.Assert(j == span.Length);
+ if (min != j)
+ sb.AppendSpan(span.Slice(min, j - min));
+ }
+ }
+
+ public static StringBuilder AppendMemory(this StringBuilder sb, ReadOnlyMemory memory)
+ {
+ Contracts.CheckValue(sb, nameof(sb));
+ if (!memory.IsEmpty)
+ sb.AppendSpan(memory.Span);
+
+ return sb;
+ }
+
+ public static StringBuilder AppendSpan(this StringBuilder sb, ReadOnlySpan span)
+ {
+ unsafe
+ {
+ fixed (char* valueChars = &MemoryMarshal.GetReference(span))
+ {
+ sb.Append(valueChars, span.Length);
+ }
+ }
+
+ return sb;
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Core/Data/TypeUtils.cs b/src/Microsoft.ML.Core/Data/TypeUtils.cs
index 30a9e4008b..c5d92f1ee7 100644
--- a/src/Microsoft.ML.Core/Data/TypeUtils.cs
+++ b/src/Microsoft.ML.Core/Data/TypeUtils.cs
@@ -2,102 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
-using System.Runtime.CompilerServices;
-
namespace Microsoft.ML.Runtime.Data
{
- using R4 = Single;
- using R8 = Double;
- using BL = DvBool;
- using TX = DvText;
-
public delegate bool RefPredicate(ref T value);
-
- ///
- /// Utilities for IDV standard types, including proper NA semantics.
- ///
- public static class TypeUtils
- {
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsNA(this R4 src) { return R4.IsNaN(src); }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsNA(this R8 src) { return R8.IsNaN(src); }
-
- #region R4
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Eq(this R4 a, R4 b)
- {
- return a == b ? BL.True : a.IsNA() || b.IsNA() ? BL.NA : BL.False;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Ne(this R4 a, R4 b)
- {
- return a != b ? a.IsNA() || b.IsNA() ? BL.NA : BL.True : BL.False;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Lt(this R4 a, R4 b)
- {
- return a < b ? BL.True : a >= b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Le(this R4 a, R4 b)
- {
- return a <= b ? BL.True : a > b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Ge(this R4 a, R4 b)
- {
- return a >= b ? BL.True : a < b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Gt(this R4 a, R4 b)
- {
- return a > b ? BL.True : a <= b ? BL.False : BL.NA;
- }
- #endregion R4
-
- #region R8
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Eq(this R8 a, R8 b)
- {
- return a == b ? BL.True : a.IsNA() || b.IsNA() ? BL.NA : BL.False;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Ne(this R8 a, R8 b)
- {
- return a != b ? a.IsNA() || b.IsNA() ? BL.NA : BL.True : BL.False;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Lt(this R8 a, R8 b)
- {
- return a < b ? BL.True : a >= b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Le(this R8 a, R8 b)
- {
- return a <= b ? BL.True : a > b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Ge(this R8 a, R8 b)
- {
- return a >= b ? BL.True : a < b ? BL.False : BL.NA;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static BL Gt(this R8 a, R8 b)
- {
- return a > b ? BL.True : a <= b ? BL.False : BL.NA;
- }
- #endregion R8
- }
}
diff --git a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj
index c7bbd498d3..a326e4af34 100644
--- a/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj
+++ b/src/Microsoft.ML.Core/Microsoft.ML.Core.csproj
@@ -11,4 +11,8 @@
+
+
+
+
diff --git a/src/Microsoft.ML.Core/Utilities/DoubleParser.cs b/src/Microsoft.ML.Core/Utilities/DoubleParser.cs
index 9ee245443b..a1a82d5218 100644
--- a/src/Microsoft.ML.Core/Utilities/DoubleParser.cs
+++ b/src/Microsoft.ML.Core/Utilities/DoubleParser.cs
@@ -70,25 +70,44 @@ public enum Result
Error = 3
}
- public static Result Parse(out Single value, string s, int ichMin, int ichLim)
+ ///
+ /// This produces zero for an empty string.
+ ///
+ public static bool TryParse(ReadOnlySpan span, out Single value)
+ {
+ var res = Parse(span, out value);
+ Contracts.Assert(res != Result.Empty || value == 0);
+ return res <= Result.Empty;
+ }
+
+ ///
+ /// This produces zero for an empty string.
+ ///
+ public static bool TryParse(ReadOnlySpan span, out Double value)
{
- Contracts.Assert(0 <= ichMin && ichMin <= ichLim && ichLim <= Utils.Size(s));
+ var res = Parse(span, out value);
+ Contracts.Assert(res != Result.Empty || value == 0);
+ return res <= Result.Empty;
+ }
- for (; ; ichMin++)
+ public static Result Parse(ReadOnlySpan span, out Single value)
+ {
+ int ich = 0;
+ for (; ; ich++)
{
- if (ichMin >= ichLim)
+ if (ich >= span.Length)
{
value = 0;
return Result.Empty;
}
- if (!char.IsWhiteSpace(s[ichMin]))
+ if (!char.IsWhiteSpace(span[ich]))
break;
}
// Handle the common case of a single digit or ?
- if (ichLim - ichMin == 1)
+ if (span.Length - ich == 1)
{
- char ch = s[ichMin];
+ char ch = span[ich];
if (ch >= '0' && ch <= '9')
{
value = ch - '0';
@@ -102,16 +121,16 @@ public static Result Parse(out Single value, string s, int ichMin, int ichLim)
}
int ichEnd;
- if (!DoubleParser.TryParse(out value, s, ichMin, ichLim, out ichEnd))
+ if (!DoubleParser.TryParse(span.Slice(ich, span.Length - ich), out value, out ichEnd))
{
value = default(Single);
return Result.Error;
}
// Make sure everything was consumed.
- while (ichEnd < ichLim)
+ while (ichEnd < span.Length)
{
- if (!char.IsWhiteSpace(s[ichEnd]))
+ if (!char.IsWhiteSpace(span[ichEnd]))
return Result.Extra;
ichEnd++;
}
@@ -119,25 +138,24 @@ public static Result Parse(out Single value, string s, int ichMin, int ichLim)
return Result.Good;
}
- public static Result Parse(out Double value, string s, int ichMin, int ichLim)
+ public static Result Parse(ReadOnlySpan span, out Double value)
{
- Contracts.Assert(0 <= ichMin && ichMin <= ichLim && ichLim <= Utils.Size(s));
-
- for (; ; ichMin++)
+ int ich = 0;
+ for (; ; ich++)
{
- if (ichMin >= ichLim)
+ if (ich >= span.Length)
{
value = 0;
return Result.Empty;
}
- if (!char.IsWhiteSpace(s[ichMin]))
+ if (!char.IsWhiteSpace(span[ich]))
break;
}
// Handle the common case of a single digit or ?
- if (ichLim - ichMin == 1)
+ if (span.Length - ich == 1)
{
- char ch = s[ichMin];
+ char ch = span[ich];
if (ch >= '0' && ch <= '9')
{
value = ch - '0';
@@ -151,16 +169,16 @@ public static Result Parse(out Double value, string s, int ichMin, int ichLim)
}
int ichEnd;
- if (!DoubleParser.TryParse(out value, s, ichMin, ichLim, out ichEnd))
+ if (!DoubleParser.TryParse(span.Slice(ich, span.Length - ich), out value, out ichEnd))
{
value = default(Double);
return Result.Error;
}
// Make sure everything was consumed.
- while (ichEnd < ichLim)
+ while (ichEnd < span.Length)
{
- if (!char.IsWhiteSpace(s[ichEnd]))
+ if (!char.IsWhiteSpace(span[ichEnd]))
return Result.Extra;
ichEnd++;
}
@@ -168,15 +186,15 @@ public static Result Parse(out Double value, string s, int ichMin, int ichLim)
return Result.Good;
}
- public static bool TryParse(out Single value, string s, int ichMin, int ichLim, out int ichEnd)
+ public static bool TryParse(ReadOnlySpan span, out Single value, out int ichEnd)
{
bool neg = false;
ulong num = 0;
long exp = 0;
- ichEnd = ichMin;
- if (!TryParseCore(s, ref ichEnd, ichLim, ref neg, ref num, ref exp))
- return TryParseSpecial(out value, s, ref ichEnd, ichLim);
+ ichEnd = 0;
+ if (!TryParseCore(span, ref ichEnd, ref neg, ref num, ref exp))
+ return TryParseSpecial(span, ref ichEnd, out value);
if (num == 0)
{
@@ -231,7 +249,7 @@ public static bool TryParse(out Single value, string s, int ichMin, int ichLim,
#if COMPARE_BCL
if (!_failed)
{
- string str = s.Substring(ichMin, ichEnd - ichMin);
+ string str = span.ToString();
Single x;
if (!Single.TryParse(str, out x))
{
@@ -257,15 +275,15 @@ public static bool TryParse(out Single value, string s, int ichMin, int ichLim,
return true;
}
- public static bool TryParse(out Double value, string s, int ichMin, int ichLim, out int ichEnd)
+ public static bool TryParse(ReadOnlySpan span, out Double value, out int ichEnd)
{
bool neg = false;
ulong num = 0;
long exp = 0;
- ichEnd = ichMin;
- if (!TryParseCore(s, ref ichEnd, ichLim, ref neg, ref num, ref exp))
- return TryParseSpecial(out value, s, ref ichEnd, ichLim);
+ ichEnd = 0;
+ if (!TryParseCore(span, ref ichEnd, ref neg, ref num, ref exp))
+ return TryParseSpecial(span, ref ichEnd, out value);
if (num == 0)
{
@@ -413,7 +431,7 @@ public static bool TryParse(out Double value, string s, int ichMin, int ichLim,
value = -value;
#if COMPARE_BCL
- string str = s.Substring(ichMin, ichEnd - ichMin);
+ string str = span.ToString();
Double x;
if (!Double.TryParse(str, out x))
{
@@ -440,19 +458,19 @@ public static bool TryParse(out Double value, string s, int ichMin, int ichLim,
return true;
}
- private static bool TryParseSpecial(out Double value, string s, ref int ich, int ichLim)
+ private static bool TryParseSpecial(ReadOnlySpan span, ref int ich, out Double value)
{
Single tmp;
- bool res = TryParseSpecial(out tmp, s, ref ich, ichLim);
+ bool res = TryParseSpecial(span, ref ich, out tmp);
value = tmp;
return res;
}
- private static bool TryParseSpecial(out Single value, string s, ref int ich, int ichLim)
+ private static bool TryParseSpecial(ReadOnlySpan span, ref int ich, out Single value)
{
- if (ich < ichLim)
+ if (ich < span.Length)
{
- switch (s[ich])
+ switch (span[ich])
{
case '?':
// We also interpret ? to mean NaN.
@@ -461,7 +479,7 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int
return true;
case 'N':
- if (ich + 3 <= ichLim && s[ich + 1] == 'a' && s[ich + 2] == 'N')
+ if (ich + 3 <= span.Length && span[ich + 1] == 'a' && span[ich + 2] == 'N')
{
value = Single.NaN;
ich += 3;
@@ -470,7 +488,7 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int
break;
case 'I':
- if (ich + 8 <= ichLim && s[ich + 1] == 'n' && s[ich + 2] == 'f' && s[ich + 3] == 'i' && s[ich + 4] == 'n' && s[ich + 5] == 'i' && s[ich + 6] == 't' && s[ich + 7] == 'y')
+ if (ich + 8 <= span.Length && span[ich + 1] == 'n' && span[ich + 2] == 'f' && span[ich + 3] == 'i' && span[ich + 4] == 'n' && span[ich + 5] == 'i' && span[ich + 6] == 't' && span[ich + 7] == 'y')
{
value = Single.PositiveInfinity;
ich += 8;
@@ -479,14 +497,14 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int
break;
case '-':
- if (ich + 2 <= ichLim && s[ich + 1] == InfinitySymbol)
+ if (ich + 2 <= span.Length && span[ich + 1] == InfinitySymbol)
{
value = Single.NegativeInfinity;
ich += 2;
return true;
}
- if (ich + 9 <= ichLim && s[ich + 1] == 'I' && s[ich + 2] == 'n' && s[ich + 3] == 'f' && s[ich + 4] == 'i' && s[ich + 5] == 'n' && s[ich + 6] == 'i' && s[ich + 7] == 't' && s[ich + 8] == 'y')
+ if (ich + 9 <= span.Length && span[ich + 1] == 'I' && span[ich + 2] == 'n' && span[ich + 3] == 'f' && span[ich + 4] == 'i' && span[ich + 5] == 'n' && span[ich + 6] == 'i' && span[ich + 7] == 't' && span[ich + 8] == 'y')
{
value = Single.NegativeInfinity;
ich += 9;
@@ -505,15 +523,14 @@ private static bool TryParseSpecial(out Single value, string s, ref int ich, int
return false;
}
- private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg, ref ulong num, ref long exp)
+ private static bool TryParseCore(ReadOnlySpan span, ref int ich, ref bool neg, ref ulong num, ref long exp)
{
- Contracts.AssertValue(s);
- Contracts.Assert(0 <= ich & ich <= ichLim & ichLim <= s.Length);
+ Contracts.Assert(0 <= ich & ich <= span.Length);
Contracts.Assert(!neg);
Contracts.Assert(num == 0);
Contracts.Assert(exp == 0);
- if (ich >= ichLim)
+ if (ich >= span.Length)
return false;
// If num gets bigger than this, we don't process additional digits.
@@ -524,19 +541,19 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
// Get started: handle sign
int i = ich;
- switch (s[i])
+ switch (span[i])
{
default:
return false;
case '-':
- if (++i >= ichLim)
+ if (++i >= span.Length)
return false;
neg = true;
break;
case '+':
- if (++i >= ichLim)
+ if (++i >= span.Length)
return false;
break;
@@ -561,8 +578,8 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
uint d;
for (; ; )
{
- Contracts.Assert(i < ichLim);
- if ((d = (uint)s[i] - '0') > 9)
+ Contracts.Assert(i < span.Length);
+ if ((d = (uint)span[i] - '0') > 9)
break;
digits = true;
@@ -571,33 +588,33 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
else
exp++;
- if (++i >= ichLim)
+ if (++i >= span.Length)
{
ich = i;
return true;
}
}
- Contracts.Assert(i < ichLim);
+ Contracts.Assert(i < span.Length);
- if (s[i] != '.')
+ if (span[i] != '.')
goto LAfterDigits;
LPoint:
- Contracts.Assert(i < ichLim);
- Contracts.Assert(s[i] == '.');
+ Contracts.Assert(i < span.Length);
+ Contracts.Assert(span[i] == '.');
// Get the digits after '.'
for (; ; )
{
- if (++i >= ichLim)
+ if (++i >= span.Length)
{
if (digits)
ich = i;
return digits;
}
- Contracts.Assert(i < ichLim);
- if ((d = (uint)s[i] - '0') > 9)
+ Contracts.Assert(i < span.Length);
+ if ((d = (uint)span[i] - '0') > 9)
break;
digits = true;
@@ -609,7 +626,7 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
}
LAfterDigits:
- Contracts.Assert(i < ichLim);
+ Contracts.Assert(i < span.Length);
if (!digits)
return false;
@@ -617,30 +634,30 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
ich = i;
// Check for an exponent.
- switch (s[i])
+ switch (span[i])
{
default:
return true;
case 'e':
case 'E':
- if (++i >= ichLim)
+ if (++i >= span.Length)
return true;
break;
}
// Handle the exponent sign.
bool expNeg = false;
- Contracts.Assert(i < ichLim);
- switch (s[i])
+ Contracts.Assert(i < span.Length);
+ switch (span[i])
{
case '-':
- if (++i >= ichLim)
+ if (++i >= span.Length)
return true;
expNeg = true;
break;
case '+':
- if (++i >= ichLim)
+ if (++i >= span.Length)
return true;
break;
}
@@ -656,14 +673,14 @@ private static bool TryParseCore(string s, ref int ich, int ichLim, ref bool neg
long e = 0;
for (; ; )
{
- Contracts.Assert(i < ichLim);
- if ((d = (uint)s[i] - '0') > 9)
+ Contracts.Assert(i < span.Length);
+ if ((d = (uint)span[i] - '0') > 9)
break;
digits = true;
if (e < eMax)
e = 10 * e + (int)d;
- if (++i >= ichLim)
+ if (++i >= span.Length)
break;
}
diff --git a/src/Microsoft.ML.Core/Utilities/Hashing.cs b/src/Microsoft.ML.Core/Utilities/Hashing.cs
index 5812937d72..a15677451b 100644
--- a/src/Microsoft.ML.Core/Utilities/Hashing.cs
+++ b/src/Microsoft.ML.Core/Utilities/Hashing.cs
@@ -11,6 +11,8 @@ namespace Microsoft.ML.Runtime.Internal.Utilities
{
public static class Hashing
{
+ private const uint _defaultSeed = (5381 << 16) + 5381;
+
public static uint CombineHash(uint u1, uint u2)
{
return ((u1 << 7) | (u1 >> 25)) ^ u2;
@@ -62,24 +64,10 @@ public static int HashInt(int n)
}
///
- /// Hash the characters in a string. This MUST produce the same result as the other
- /// overloads (with equivalent characters).
- ///
- public static uint HashString(string str)
- {
- Contracts.AssertValue(str);
- return MurmurHash((5381 << 16) + 5381, str, 0, str.Length);
- }
-
- ///
- /// Hash the characters in a sub-string. This MUST produce the same result
- /// as HashString(str.SubString(ichMin, ichLim - ichMin)).
+ /// Hash the characters in a of .
+ /// This MUST produce the same result as the other overloads (with equivalent characters).
///
- public static uint HashString(string str, int ichMin, int ichLim)
- {
- Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(str));
- return MurmurHash((5381 << 16) + 5381, str, ichMin, ichLim);
- }
+ public static uint HashString(ReadOnlySpan str) => MurmurHash(_defaultSeed, str);
///
/// Hash the characters in a string builder. This MUST produce the same result
@@ -88,12 +76,12 @@ public static uint HashString(string str, int ichMin, int ichLim)
public static uint HashString(StringBuilder sb)
{
Contracts.AssertValue(sb);
- return MurmurHash((5381 << 16) + 5381, sb, 0, sb.Length);
+ return MurmurHash(_defaultSeed, sb, 0, sb.Length);
}
public static uint HashSequence(uint[] sequence, int min, int lim)
{
- return MurmurHash((5381 << 16) + 5381, sequence, min, lim);
+ return MurmurHash(_defaultSeed, sequence, min, lim);
}
///
@@ -125,23 +113,21 @@ public static uint MurmurRound(uint hash, uint chunk)
/// * 0x0800 to 0xFFFF : 1110xxxx 10xxxxxx 10xxxxxx
/// NOTE: This MUST match the StringBuilder version below.
///
- public static uint MurmurHash(uint hash, string data, int ichMin, int ichLim, bool toUpper = false)
+ public static uint MurmurHash(uint hash, ReadOnlySpan span, bool toUpper = false)
{
- Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(data));
-
// Byte length (in pseudo UTF-8 form).
int len = 0;
// Current bits, value and count.
ulong cur = 0;
int bits = 0;
- for (int ich = ichMin; ich < ichLim; ich++)
+ for (int ich = 0; ich < span.Length; ich++)
{
Contracts.Assert((bits & 0x7) == 0);
Contracts.Assert((uint)bits <= 24);
Contracts.Assert(cur <= 0x00FFFFFF);
- uint ch = toUpper ? char.ToUpperInvariant(data[ich]) : data[ich];
+ uint ch = toUpper ? char.ToUpperInvariant(span[ich]) : span[ich];
if (ch <= 0x007F)
{
cur |= ch << bits;
@@ -256,7 +242,7 @@ public static uint MurmurHash(uint hash, StringBuilder data, int ichMin, int ich
// Final mixing ritual for the hash.
hash = MixHash(hash);
- Contracts.Assert(hash == MurmurHash(seed, data.ToString(), 0, data.Length));
+ Contracts.Assert(hash == MurmurHash(seed, data.ToString().AsSpan()));
return hash;
}
diff --git a/src/Microsoft.ML.Core/Utilities/NormStr.cs b/src/Microsoft.ML.Core/Utilities/NormStr.cs
index 50b72196ed..fea018ac58 100644
--- a/src/Microsoft.ML.Core/Utilities/NormStr.cs
+++ b/src/Microsoft.ML.Core/Utilities/NormStr.cs
@@ -8,6 +8,7 @@
using System.Linq;
using System.Threading;
using System.Text;
+using Microsoft.ML.Runtime.Data;
namespace Microsoft.ML.Runtime.Internal.Utilities
{
@@ -18,37 +19,26 @@ namespace Microsoft.ML.Runtime.Internal.Utilities
///
public sealed class NormStr
{
- public readonly string Value;
+ public readonly ReadOnlyMemory Value;
public readonly int Id;
private readonly uint _hash;
///
/// NormStr's can only be created by the Pool.
///
- private NormStr(string str, int id, uint hash)
+ private NormStr(ReadOnlyMemory str, int id, uint hash)
{
- Contracts.AssertValue(str);
- Contracts.Assert(id >= 0 || id == -1 && str == "");
+ Contracts.Assert(id >= 0 || id == -1 && str.IsEmpty);
Value = str;
Id = id;
_hash = hash;
}
- public override string ToString()
- {
- return Value;
- }
-
public override int GetHashCode()
{
return (int)_hash;
}
- public static implicit operator string(NormStr nstr)
- {
- return nstr.Value;
- }
-
public sealed class Pool : IEnumerable
{
private int _mask; // Number of buckets minus 1. The number of buckets must be a power of two.
@@ -107,7 +97,8 @@ public NormStr Get(string str, bool add = false)
if (str == null)
str = "";
- uint hash = Hashing.HashString(str);
+ var strSpan = str.AsSpan();
+ uint hash = Hashing.HashString(strSpan);
int ins = GetIns(hash);
while (ins >= 0)
{
@@ -115,75 +106,50 @@ public NormStr Get(string str, bool add = false)
if ((int)Utils.GetLo(meta) == str.Length)
{
var ns = GetNs(ins);
- if (ns.Value == str)
+ if (strSpan.SequenceEqual(ns.Value.Span))
return ns;
}
ins = (int)Utils.GetHi(meta);
}
Contracts.Assert(ins == -1);
- return add ? AddCore(str, hash) : null;
+ return add ? AddCore(str.AsMemory(), hash) : null;
}
- ///
- /// Make sure the given string has an equivalent NormStr in the pool and return it.
- ///
- public NormStr Add(string str)
- {
- return Get(str, true);
- }
-
- ///
- /// Determine if the given sub-string has an equivalent NormStr in the pool.
- ///
- public NormStr Get(string str, int ichMin, int ichLim, bool add = false)
+ public NormStr Get(ReadOnlyMemory str, bool add = false)
{
AssertValid();
- Contracts.Assert(0 <= ichMin & ichMin <= ichLim & ichLim <= Utils.Size(str));
- if (str == null)
- return Get("", add);
-
- if (ichMin == 0 && ichLim == str.Length)
- return Get(str, add);
-
- uint hash = Hashing.HashString(str, ichMin, ichLim);
+ var span = str.Span;
+ uint hash = Hashing.HashString(span);
int ins = GetIns(hash);
- if (ins >= 0)
+ while (ins >= 0)
{
- int cch = ichLim - ichMin;
- var rgmeta = _rgmeta;
- for (; ; )
+ ulong meta = _rgmeta[ins];
+ if ((int)Utils.GetLo(meta) == str.Length)
{
- ulong meta = rgmeta[ins];
- if ((int)Utils.GetLo(meta) == cch)
- {
- var ns = GetNs(ins);
- var value = ns.Value;
- for (int ich = 0; ; ich++)
- {
- if (ich == cch)
- return ns;
- if (value[ich] != str[ich + ichMin])
- break;
- }
- }
- ins = (int)Utils.GetHi(meta);
- if (ins < 0)
- break;
+ var ns = GetNs(ins);
+ if (ns.Value.Span.SequenceEqual(span))
+ return ns;
}
+ ins = (int)Utils.GetHi(meta);
}
Contracts.Assert(ins == -1);
- return add ? AddCore(str.Substring(ichMin, ichLim - ichMin), hash) : null;
+ return add ? AddCore(str, hash) : null;
}
///
- /// Make sure the given sub-string has an equivalent NormStr in the pool and return it.
+ /// Make sure the given string has an equivalent NormStr in the pool and return it.
///
- public NormStr Add(string str, int ichMin, int ichLim)
+ public NormStr Add(string str)
{
- return Get(str, ichMin, ichLim, true);
+ return Get(str, true);
+ }
+
+ public NormStr Add(ReadOnlyMemory str)
+ {
+ return Get(str, true);
}
///
@@ -212,7 +178,7 @@ public NormStr Get(StringBuilder sb, bool add = false)
{
if (ich == cch)
return ns;
- if (value[ich] != sb[ich])
+ if (value.Span[ich] != sb[ich])
break;
}
}
@@ -220,7 +186,7 @@ public NormStr Get(StringBuilder sb, bool add = false)
}
Contracts.Assert(ins == -1);
- return add ? AddCore(sb.ToString(), hash) : null;
+ return add ? AddCore(sb.ToString().AsMemory(), hash) : null;
}
///
@@ -234,11 +200,10 @@ public NormStr Add(StringBuilder sb)
///
/// Adds the item. Does NOT check for whether the item is already present.
///
- private NormStr AddCore(string str, uint hash)
+ private NormStr AddCore(ReadOnlyMemory str, uint hash)
{
- Contracts.AssertValue(str);
Contracts.Assert(str.Length >= 0);
- Contracts.Assert(Hashing.HashString(str) == hash);
+ Contracts.Assert(Hashing.HashString(str.Span) == hash);
if (_rgns == null)
{
diff --git a/src/Microsoft.ML.Core/Utilities/Stream.cs b/src/Microsoft.ML.Core/Utilities/Stream.cs
index 5e2974e2a5..7fbf0148b9 100644
--- a/src/Microsoft.ML.Core/Utilities/Stream.cs
+++ b/src/Microsoft.ML.Core/Utilities/Stream.cs
@@ -2,8 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using Float = System.Single;
-
using System;
using System.Collections;
using System.Collections.Generic;
@@ -178,7 +176,7 @@ public static void WriteBytesNoCount(this BinaryWriter writer, byte[] values, in
///
/// Writes a length prefixed array of Floats.
///
- public static void WriteFloatArray(this BinaryWriter writer, Float[] values)
+ public static void WriteFloatArray(this BinaryWriter writer, float[] values)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -197,7 +195,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values)
///
/// Writes a length prefixed array of Floats.
///
- public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int count)
+ public static void WriteFloatArray(this BinaryWriter writer, float[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -211,7 +209,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int
///
/// Writes a specified number of floats starting at the specified index from an array.
///
- public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int start, int count)
+ public static void WriteFloatArray(this BinaryWriter writer, float[] values, int start, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValue(values);
@@ -225,7 +223,7 @@ public static void WriteFloatArray(this BinaryWriter writer, Float[] values, int
///
/// Writes a length prefixed array of Floats.
///
- public static void WriteFloatArray(this BinaryWriter writer, IEnumerable values, int count)
+ public static void WriteFloatArray(this BinaryWriter writer, IEnumerable values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValue(values);
@@ -244,7 +242,7 @@ public static void WriteFloatArray(this BinaryWriter writer, IEnumerable
///
/// Writes an array of Floats without the length prefix.
///
- public static void WriteFloatsNoCount(this BinaryWriter writer, Float[] values, int count)
+ public static void WriteFloatsNoCount(this BinaryWriter writer, float[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -257,7 +255,7 @@ public static void WriteFloatsNoCount(this BinaryWriter writer, Float[] values,
///
/// Writes a length prefixed array of singles.
///
- public static void WriteSingleArray(this BinaryWriter writer, Single[] values)
+ public static void WriteSingleArray(this BinaryWriter writer, float[] values)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -276,7 +274,7 @@ public static void WriteSingleArray(this BinaryWriter writer, Single[] values)
///
/// Writes a length prefixed array of singles.
///
- public static void WriteSingleArray(this BinaryWriter writer, Single[] values, int count)
+ public static void WriteSingleArray(this BinaryWriter writer, float[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -290,7 +288,7 @@ public static void WriteSingleArray(this BinaryWriter writer, Single[] values, i
///
/// Writes an array of singles without the length prefix.
///
- public static void WriteSinglesNoCount(this BinaryWriter writer, Single[] values, int count)
+ public static void WriteSinglesNoCount(this BinaryWriter writer, float[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -303,7 +301,7 @@ public static void WriteSinglesNoCount(this BinaryWriter writer, Single[] values
///
/// Writes a length prefixed array of doubles.
///
- public static void WriteDoubleArray(this BinaryWriter writer, Double[] values)
+ public static void WriteDoubleArray(this BinaryWriter writer, double[] values)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -315,14 +313,14 @@ public static void WriteDoubleArray(this BinaryWriter writer, Double[] values)
}
writer.Write(values.Length);
- foreach (Double val in values)
+ foreach (double val in values)
writer.Write(val);
}
///
/// Writes a length prefixed array of doubles.
///
- public static void WriteDoubleArray(this BinaryWriter writer, Double[] values, int count)
+ public static void WriteDoubleArray(this BinaryWriter writer, double[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -336,7 +334,7 @@ public static void WriteDoubleArray(this BinaryWriter writer, Double[] values, i
///
/// Writes an array of doubles without the length prefix.
///
- public static void WriteDoublesNoCount(this BinaryWriter writer, Double[] values, int count)
+ public static void WriteDoublesNoCount(this BinaryWriter writer, double[] values, int count)
{
Contracts.AssertValue(writer);
Contracts.AssertValueOrNull(values);
@@ -427,7 +425,7 @@ public static void WriteBitArray(this BinaryWriter writer, BitArray arr)
}
}
- public static long WriteSByteStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteSByteStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -438,7 +436,7 @@ public static long WriteSByteStream(this BinaryWriter writer, IEnumerable
return c;
}
- public static long WriteByteStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteByteStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -449,7 +447,7 @@ public static long WriteByteStream(this BinaryWriter writer, IEnumerable e
return c;
}
- public static long WriteIntStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteIntStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -460,7 +458,7 @@ public static long WriteIntStream(this BinaryWriter writer, IEnumerable e
return c;
}
- public static long WriteUIntStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteUIntStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -471,7 +469,7 @@ public static long WriteUIntStream(this BinaryWriter writer, IEnumerable
return c;
}
- public static long WriteShortStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteShortStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -482,7 +480,7 @@ public static long WriteShortStream(this BinaryWriter writer, IEnumerable
return c;
}
- public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -493,7 +491,7 @@ public static long WriteUShortStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteLongStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -504,7 +502,7 @@ public static long WriteLongStream(this BinaryWriter writer, IEnumerable
return c;
}
- public static long WriteULongStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteULongStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -515,7 +513,7 @@ public static long WriteULongStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteSingleStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -526,7 +524,7 @@ public static long WriteSingleStream(this BinaryWriter writer, IEnumerable e)
+ public static long WriteDoubleStream(this BinaryWriter writer, IEnumerable e)
{
long c = 0;
foreach (var v in e)
@@ -606,12 +604,12 @@ public static bool ReadBoolByte(this BinaryReader reader)
return b != 0;
}
- public static Float ReadFloat(this BinaryReader reader)
+ public static float ReadFloat(this BinaryReader reader)
{
return reader.ReadSingle();
}
- public static Float[] ReadFloatArray(this BinaryReader reader)
+ public static float[] ReadFloatArray(this BinaryReader reader)
{
Contracts.AssertValue(reader);
@@ -620,16 +618,16 @@ public static Float[] ReadFloatArray(this BinaryReader reader)
return ReadFloatArray(reader, size);
}
- public static Float[] ReadFloatArray(this BinaryReader reader, int size)
+ public static float[] ReadFloatArray(this BinaryReader reader, int size)
{
Contracts.AssertValue(reader);
Contracts.Assert(size >= 0);
if (size == 0)
return null;
- var values = new Float[size];
+ var values = new float[size];
- long bufferSizeInBytes = (long)size * sizeof(Float);
+ long bufferSizeInBytes = (long)size * sizeof(float);
if (bufferSizeInBytes < _bulkReadThresholdInBytes)
{
for (int i = 0; i < size; i++)
@@ -649,14 +647,14 @@ public static Float[] ReadFloatArray(this BinaryReader reader, int size)
return values;
}
- public static void ReadFloatArray(this BinaryReader reader, Float[] array, int start, int count)
+ public static void ReadFloatArray(this BinaryReader reader, float[] array, int start, int count)
{
Contracts.AssertValue(reader);
Contracts.AssertValue(array);
Contracts.Assert(0 <= start && start < array.Length);
Contracts.Assert(0 < count && count <= array.Length - start);
- long bufferReadLengthInBytes = (long)count * sizeof(Float);
+ long bufferReadLengthInBytes = (long)count * sizeof(float);
if (bufferReadLengthInBytes < _bulkReadThresholdInBytes)
{
for (int i = 0; i < count; i++)
@@ -668,15 +666,15 @@ public static void ReadFloatArray(this BinaryReader reader, Float[] array, int s
{
fixed (void* dst = array)
{
- long bufferBeginOffsetInBytes = (long)start * sizeof(Float);
- long bufferSizeInBytes = ((long)array.Length - start) * sizeof(Float);
+ long bufferBeginOffsetInBytes = (long)start * sizeof(float);
+ long bufferSizeInBytes = ((long)array.Length - start) * sizeof(float);
ReadBytes(reader, (byte*)dst + bufferBeginOffsetInBytes, bufferSizeInBytes, bufferReadLengthInBytes);
}
}
}
}
- public static Single[] ReadSingleArray(this BinaryReader reader)
+ public static float[] ReadSingleArray(this BinaryReader reader)
{
Contracts.AssertValue(reader);
int size = reader.ReadInt32();
@@ -684,15 +682,15 @@ public static Single[] ReadSingleArray(this BinaryReader reader)
return ReadSingleArray(reader, size);
}
- public static Single[] ReadSingleArray(this BinaryReader reader, int size)
+ public static float[] ReadSingleArray(this BinaryReader reader, int size)
{
Contracts.AssertValue(reader);
Contracts.Assert(size >= 0);
if (size == 0)
return null;
- var values = new Single[size];
+ var values = new float[size];
- long bufferSizeInBytes = (long)size * sizeof(Single);
+ long bufferSizeInBytes = (long)size * sizeof(float);
if (bufferSizeInBytes < _bulkReadThresholdInBytes)
{
for (int i = 0; i < size; i++)
@@ -712,7 +710,7 @@ public static Single[] ReadSingleArray(this BinaryReader reader, int size)
return values;
}
- public static Double[] ReadDoubleArray(this BinaryReader reader)
+ public static double[] ReadDoubleArray(this BinaryReader reader)
{
Contracts.AssertValue(reader);
@@ -721,15 +719,15 @@ public static Double[] ReadDoubleArray(this BinaryReader reader)
return ReadDoubleArray(reader, size);
}
- public static Double[] ReadDoubleArray(this BinaryReader reader, int size)
+ public static double[] ReadDoubleArray(this BinaryReader reader, int size)
{
Contracts.AssertValue(reader);
Contracts.Assert(size >= 0);
if (size == 0)
return null;
- var values = new Double[size];
+ var values = new double[size];
- long bufferSizeInBytes = (long)size * sizeof(Double);
+ long bufferSizeInBytes = (long)size * sizeof(double);
if (bufferSizeInBytes < _bulkReadThresholdInBytes)
{
for (int i = 0; i < size; i++)
diff --git a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs
index 610c4ee25f..301603b14a 100644
--- a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs
+++ b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs
@@ -301,10 +301,10 @@ public static TScorerFactory GetScorerComponent(
Contracts.AssertValue(mapper);
ComponentCatalog.LoadableClassInfo info = null;
- DvText scoreKind = default;
+ ReadOnlyMemory scoreKind = default;
if (mapper.OutputSchema.ColumnCount > 0 &&
mapper.OutputSchema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreColumnKind, 0, ref scoreKind) &&
- scoreKind.HasChars)
+ !scoreKind.IsEmpty)
{
var loadName = scoreKind.ToString();
info = ComponentCatalog.GetLoadableClassInfo(loadName);
diff --git a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs
index 305dadd4f2..46bb704b6f 100644
--- a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs
+++ b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs
@@ -132,7 +132,7 @@ private static void PrintSchema(TextWriter writer, Arguments args, ISchema schem
var itw = IndentingTextWriter.Wrap(writer);
using (itw.Nest())
{
- var names = default(VBuffer);
+ var names = default(VBuffer>);
for (int col = 0; col < colLim; col++)
{
var name = schema.GetColumnName(col);
@@ -171,7 +171,7 @@ private static void PrintSchema(TextWriter writer, Arguments args, ISchema schem
bool verbose = args.Verbose ?? false;
foreach (var kvp in names.Items(all: verbose))
{
- if (verbose || kvp.Value.HasChars)
+ if (verbose || !kvp.Value.IsEmpty)
itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value);
}
}
diff --git a/src/Microsoft.ML.Data/Commands/TrainCommand.cs b/src/Microsoft.ML.Data/Commands/TrainCommand.cs
index 431681ec2a..acc03b743f 100644
--- a/src/Microsoft.ML.Data/Commands/TrainCommand.cs
+++ b/src/Microsoft.ML.Data/Commands/TrainCommand.cs
@@ -443,7 +443,6 @@ public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITra
{
if (autoNorm != NormalizeOption.Yes)
{
- DvBool isNormalized = DvBool.False;
if (!trainer.Info.NeedNormalization || schema.IsNormalized(featCol))
{
ch.Info("Not adding a normalizer.");
diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs
index 1c0e7cde08..b5f20eac5a 100644
--- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs
+++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs
@@ -89,8 +89,8 @@ private void AssertValid()
public static BufferBuilder CreateDefault()
{
- if (typeof(T) == typeof(DvText))
- return (BufferBuilder)(object)new BufferBuilder(TextCombiner.Instance);
+ if (typeof(T) == typeof(ReadOnlyMemory))
+ return (BufferBuilder)(object)new BufferBuilder>(TextCombiner.Instance);
if (typeof(T) == typeof(float))
return (BufferBuilder)(object)new BufferBuilder(FloatAdder.Instance);
throw Contracts.Except($"Unrecognized type '{typeof(T)}' for default {nameof(BufferBuilder)}");
diff --git a/src/Microsoft.ML.Data/Data/Combiner.cs b/src/Microsoft.ML.Data/Data/Combiner.cs
index ee45aee3e3..6335620b8b 100644
--- a/src/Microsoft.ML.Data/Data/Combiner.cs
+++ b/src/Microsoft.ML.Data/Data/Combiner.cs
@@ -19,7 +19,7 @@ public abstract class Combiner
public abstract void Combine(ref T dst, T src);
}
- public sealed class TextCombiner : Combiner
+ public sealed class TextCombiner : Combiner>
{
private static volatile TextCombiner _instance;
public static TextCombiner Instance
@@ -36,8 +36,8 @@ private TextCombiner()
{
}
- public override bool IsDefault(DvText value) { return value.Length == 0; }
- public override void Combine(ref DvText dst, DvText src)
+ public override bool IsDefault(ReadOnlyMemory value) { return value.Length == 0; }
+ public override void Combine(ref ReadOnlyMemory dst, ReadOnlyMemory src)
{
Contracts.Check(IsDefault(dst));
dst = src;
diff --git a/src/Microsoft.ML.Data/Data/Conversion.cs b/src/Microsoft.ML.Data/Data/Conversion.cs
index 0a9833064a..1f08d63fc3 100644
--- a/src/Microsoft.ML.Data/Data/Conversion.cs
+++ b/src/Microsoft.ML.Data/Data/Conversion.cs
@@ -14,22 +14,18 @@
namespace Microsoft.ML.Runtime.Data.Conversion
{
- using BL = DvBool;
- using DT = DvDateTime;
- using DZ = DvDateTimeZone;
- using I1 = DvInt1;
- using I2 = DvInt2;
- using I4 = DvInt4;
- using I8 = DvInt8;
+ using BL = Boolean;
+ using DT = DateTime;
+ using DZ = DateTimeOffset;
using R4 = Single;
using R8 = Double;
- using RawI1 = SByte;
- using RawI2 = Int16;
- using RawI4 = Int32;
- using RawI8 = Int64;
+ using I1 = SByte;
+ using I2 = Int16;
+ using I4 = Int32;
+ using I8 = Int64;
using SB = StringBuilder;
- using TS = DvTimeSpan;
- using TX = DvText;
+ using TX = ReadOnlyMemory;
+ using TS = TimeSpan;
using U1 = Byte;
using U2 = UInt16;
using U4 = UInt32;
@@ -244,41 +240,14 @@ private Conversions()
AddStd(Convert);
AddAux(Convert);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
AddIsNA(IsNA);
AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
- AddIsNA(IsNA);
-
- AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
+
AddGetNA(GetNA);
AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
- AddGetNA(GetNA);
-
- AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
+
AddHasNA(HasNA);
AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
- AddHasNA(HasNA);
AddIsDef(IsDefault);
AddIsDef(IsDefault);
@@ -533,7 +502,7 @@ public bool TryGetStringConversion(ColumnType type, out ValueMapper(out ValueMapper conv)
{
DataKind kindSrc;
- if (!_kinds.TryGetValue(typeof (TSrc), out kindSrc))
+ if (!_kinds.TryGetValue(typeof(TSrc), out kindSrc))
{
conv = null;
return false;
@@ -846,42 +815,24 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type)
// The IsNA methods are for efficient delegates (instance instead of static).
#region IsNA
- private bool IsNA(ref I1 src) => src.IsNA;
- private bool IsNA(ref I2 src) => src.IsNA;
- private bool IsNA(ref I4 src) => src.IsNA;
- private bool IsNA(ref I8 src) => src.IsNA;
- private bool IsNA(ref R4 src) => src.IsNA();
- private bool IsNA(ref R8 src) => src.IsNA();
- private bool IsNA(ref BL src) => src.IsNA;
- private bool IsNA(ref TS src) => src.IsNA;
- private bool IsNA(ref DT src) => src.IsNA;
- private bool IsNA(ref DZ src) => src.IsNA;
- private bool IsNA(ref TX src) => src.IsNA;
+ private bool IsNA(ref R4 src) => R4.IsNaN(src);
+ private bool IsNA(ref R8 src) => R8.IsNaN(src);
#endregion IsNA
#region HasNA
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
- private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; }
+ private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (R4.IsNaN(src.Values[i])) return true; } return false; }
+ private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (R8.IsNaN(src.Values[i])) return true; } return false; }
#endregion HasNA
#region IsDefault
- private bool IsDefault(ref I1 src) => src.RawValue == 0;
- private bool IsDefault(ref I2 src) => src.RawValue == 0;
- private bool IsDefault(ref I4 src) => src.RawValue == 0;
- private bool IsDefault(ref I8 src) => src.RawValue == 0;
+ private bool IsDefault(ref I1 src) => src == default(I1);
+ private bool IsDefault(ref I2 src) => src == default(I2);
+ private bool IsDefault(ref I4 src) => src == default(I4);
+ private bool IsDefault(ref I8 src) => src == default(I8);
private bool IsDefault(ref R4 src) => src == 0;
private bool IsDefault(ref R8 src) => src == 0;
private bool IsDefault(ref TX src) => src.IsEmpty;
- private bool IsDefault(ref BL src) => src.IsFalse;
+ private bool IsDefault(ref BL src) => src == default;
private bool IsDefault(ref U1 src) => src == 0;
private bool IsDefault(ref U2 src) => src == 0;
private bool IsDefault(ref U4 src) => src == 0;
@@ -900,17 +851,8 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type)
#endregion HasZero
#region GetNA
- private void GetNA(ref I1 value) => value = I1.NA;
- private void GetNA(ref I2 value) => value = I2.NA;
- private void GetNA(ref I4 value) => value = I4.NA;
- private void GetNA(ref I8 value) => value = I8.NA;
private void GetNA(ref R4 value) => value = R4.NaN;
private void GetNA(ref R8 value) => value = R8.NaN;
- private void GetNA(ref BL value) => value = BL.NA;
- private void GetNA(ref TS value) => value = TS.NA;
- private void GetNA(ref DT value) => value = DT.NA;
- private void GetNA(ref DZ value) => value = DZ.NA;
- private void GetNA(ref TX value) => value = TX.NA;
#endregion GetNA
#region ToI1
@@ -1022,28 +964,28 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type)
#endregion ToR8
#region ToStringBuilder
- public void Convert(ref I1 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); }
- public void Convert(ref I2 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); }
- public void Convert(ref I4 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); }
- public void Convert(ref I8 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.Append(src.RawValue); }
+ public void Convert(ref I1 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); }
+ public void Convert(ref I2 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); }
+ public void Convert(ref I4 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); }
+ public void Convert(ref I8 src, ref SB dst) { ClearDst(ref dst); dst.Append(src); }
public void Convert(ref U1 src, ref SB dst) => ClearDst(ref dst).Append(src);
public void Convert(ref U2 src, ref SB dst) => ClearDst(ref dst).Append(src);
public void Convert(ref U4 src, ref SB dst) => ClearDst(ref dst).Append(src);
public void Convert(ref U8 src, ref SB dst) => ClearDst(ref dst).Append(src);
public void Convert(ref UG src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("0x{0:x16}{1:x16}", src.Hi, src.Lo); }
- public void Convert(ref R4 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA()) dst.AppendFormat(CultureInfo.InvariantCulture, "{0:R}", src); }
- public void Convert(ref R8 src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA()) dst.AppendFormat(CultureInfo.InvariantCulture, "{0:G17}", src); }
+ public void Convert(ref R4 src, ref SB dst) { ClearDst(ref dst); if (R4.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:R}", src); }
+ public void Convert(ref R8 src, ref SB dst) { ClearDst(ref dst); if (R8.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:G17}", src); }
public void Convert(ref BL src, ref SB dst)
{
ClearDst(ref dst);
- if (src.IsFalse)
+ if (!src)
dst.Append("0");
- else if (src.IsTrue)
+ else
dst.Append("1");
}
- public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:c}", (TimeSpan)src); }
- public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTime)src); }
- public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTimeOffset)src); }
+ public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:c}", src); }
+ public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); }
+ public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); }
#endregion ToStringBuilder
#region FromR4
@@ -1108,16 +1050,13 @@ public bool TryParse(ref TX src, out U4 dst)
///
public bool TryParse(ref TX src, out U8 dst)
{
- if (src.IsNA)
+ if (src.IsEmpty)
{
dst = 0;
return false;
}
- int ichMin;
- int ichLim;
- string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim);
- return TryParseCore(text, ichMin, ichLim, out dst);
+ return TryParseCore(src.Span, out dst);
}
///
@@ -1130,16 +1069,15 @@ public bool TryParse(ref TX src, out U8 dst)
/// and had only digits and the letters 'a' through 'f' or 'A' through 'F' as characters
public bool TryParse(ref TX src, out UG dst)
{
+ var span = src.Span;
// REVIEW: Accomodate numeric inputs?
- if (src.Length != 34 || src[0] != '0' || (src[1] != 'x' && src[1] != 'X'))
+ if (src.Length != 34 || span[0] != '0' || (span[1] != 'x' && span[1] != 'X'))
{
dst = default(UG);
return false;
}
- int ichMin;
- int ichLim;
- string tx = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim);
- int offset = ichMin + 2;
+
+ int offset = 2;
ulong hi = 0;
ulong num = 0;
for (int i = 0; i < 2; ++i)
@@ -1147,7 +1085,7 @@ public bool TryParse(ref TX src, out UG dst)
for (int d = 0; d < 16; ++d)
{
num <<= 4;
- char c = tx[offset++];
+ char c = span[offset++];
// REVIEW: An exhaustive switch statement *might* be faster, maybe, at the
// cost of being significantly longer.
if ('0' <= c && c <= '9')
@@ -1168,7 +1106,7 @@ public bool TryParse(ref TX src, out UG dst)
num = 0;
}
}
- Contracts.Assert(offset == ichLim);
+ Contracts.Assert(offset == src.Length);
// The first read bits are the higher order bits, so they are listed second here.
dst = new UG(num, hi);
return true;
@@ -1181,44 +1119,44 @@ public bool TryParse(ref TX src, out UG dst)
/// The standard representations are any casing of:
/// ? NaN NA N/A
///
- private bool IsStdMissing(ref TX src)
+ private bool IsStdMissing(ref ReadOnlySpan span)
{
- Contracts.Assert(src.HasChars);
+ Contracts.Assert(!span.IsEmpty);
char ch;
- switch (src.Length)
+ switch (span.Length)
{
- default:
- return false;
-
- case 1:
- if (src[0] == '?')
- return true;
- return false;
- case 2:
- if ((ch = src[0]) != 'N' && ch != 'n')
- return false;
- if ((ch = src[1]) != 'A' && ch != 'a')
+ default:
return false;
- return true;
- case 3:
- if ((ch = src[0]) != 'N' && ch != 'n')
+
+ case 1:
+ if (span[0] == '?')
+ return true;
return false;
- if ((ch = src[1]) == '/')
- {
- // Check for N/A.
- if ((ch = src[2]) != 'A' && ch != 'a')
+ case 2:
+ if ((ch = span[0]) != 'N' && ch != 'n')
return false;
- }
- else
- {
- // Check for NaN.
- if (ch != 'a' && ch != 'A')
+ if ((ch = span[1]) != 'A' && ch != 'a')
return false;
- if ((ch = src[2]) != 'N' && ch != 'n')
+ return true;
+ case 3:
+ if ((ch = span[0]) != 'N' && ch != 'n')
return false;
- }
- return true;
+ if ((ch = span[1]) == '/')
+ {
+ // Check for N/A.
+ if ((ch = span[2]) != 'A' && ch != 'a')
+ return false;
+ }
+ else
+ {
+ // Check for NaN.
+ if (ch != 'a' && ch != 'A')
+ return false;
+ if ((ch = span[2]) != 'N' && ch != 'n')
+ return false;
+ }
+ return true;
}
}
@@ -1226,11 +1164,13 @@ private bool IsStdMissing(ref TX src)
/// Utility to assist in parsing key-type values. The min and max values define
/// the legal input value bounds. The output dst value is "normalized" so min is
/// mapped to 1, max is mapped to 1 + (max - min).
- /// Missing values are mapped to zero with a true return.
+ /// Exception is thrown for missing values.
/// Unparsable or out of range values are mapped to zero with a false return.
///
public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst)
{
+ var span = src.Span;
+ Contracts.Check(!IsStdMissing(ref span), "Missing text value cannot be converted to unsigned integer type.");
Contracts.Assert(min <= max);
// This simply ensures we don't have min == 0 and max == U8.MaxValue. This is illegal since
@@ -1240,22 +1180,19 @@ public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst)
// Both empty and missing map to zero (NA for key values) and that mapping is valid,
// hence the true return.
- if (!src.HasChars)
+ if (src.IsEmpty)
{
dst = 0;
return true;
}
// Parse a ulong.
- int ichMin;
- int ichLim;
- string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim);
ulong uu;
- if (!TryParseCore(text, ichMin, ichLim, out uu))
+ if (!TryParseCore(span, out uu))
{
dst = 0;
// Return true only for standard forms for NA.
- return IsStdMissing(ref src);
+ return false;
}
if (min > uu || uu > max)
@@ -1268,14 +1205,13 @@ public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst)
return true;
}
- private bool TryParseCore(string text, int ich, int lim, out ulong dst)
+ private bool TryParseCore(ReadOnlySpan span, out ulong dst)
{
- Contracts.Assert(0 <= ich && ich <= lim && lim <= Utils.Size(text));
-
ulong res = 0;
- while (ich < lim)
+ int ich = 0;
+ while (ich < span.Length)
{
- uint d = (uint)text[ich++] - (uint)'0';
+ uint d = (uint)span[ich++] - (uint)'0';
if (d >= 10)
goto LFail;
@@ -1301,71 +1237,70 @@ private bool TryParseCore(string text, int ich, int lim, out ulong dst)
///
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
- /// On failure, it sets dst to the NA value.
+ /// On failure, it sets dst to the default value.
///
public bool TryParse(ref TX src, out I1 dst)
{
- long res;
- bool f = TryParseSigned(RawI1.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I1.RawNA);
- Contracts.Assert((RawI1)res == res);
- dst = (RawI1)res;
- return f;
+ dst = default;
+ TryParseSigned(I1.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to sbyte.");
+ Contracts.Check((I1)res == res, "Overflow or underflow occured while converting value in text to sbyte.");
+ dst = (I1)res;
+ return true;
}
///
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
- /// On failure, it sets dst to the NA value.
+ /// On failure, it sets dst to the default value.
///
public bool TryParse(ref TX src, out I2 dst)
{
- long res;
- bool f = TryParseSigned(RawI2.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I2.RawNA);
- Contracts.Assert((RawI2)res == res);
- dst = (RawI2)res;
- return f;
+ dst = default;
+ TryParseSigned(I2.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to short.");
+ Contracts.Check((I2)res == res, "Overflow or underflow occured while converting value in text to short.");
+ dst = (I2)res;
+ return true;
}
///
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
- /// On failure, it sets dst to the NA value.
+ /// On failure, it sets dst to the defualt value.
///
public bool TryParse(ref TX src, out I4 dst)
{
- long res;
- bool f = TryParseSigned(RawI4.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I4.RawNA);
- Contracts.Assert((RawI4)res == res);
- dst = (RawI4)res;
- return f;
+ dst = default;
+ TryParseSigned(I4.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to int32.");
+ Contracts.Check((I4)res == res, "Overflow or underflow occured while converting value in text to int.");
+ dst = (I4)res;
+ return true;
}
///
/// This produces zero for empty. It returns false if the text is not parsable or overflows.
- /// On failure, it sets dst to the NA value.
+ /// On failure, it sets dst to the default value.
///
public bool TryParse(ref TX src, out I8 dst)
{
- long res;
- bool f = TryParseSigned(RawI8.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I8.RawNA);
- dst = res;
- return f;
+ dst = default;
+ TryParseSigned(I8.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to long.");
+ dst = (I8)res;
+ return true;
}
///
/// Returns false if the text is not parsable as an non-negative long or overflows.
///
- private bool TryParseNonNegative(string text, int ich, int lim, out long result)
+ private bool TryParseNonNegative(ReadOnlySpan span, out long result)
{
- Contracts.Assert(0 <= ich && ich <= lim && lim <= Utils.Size(text));
-
long res = 0;
- while (ich < lim)
+ int ich = 0;
+ while (ich < span.Length)
{
Contracts.Assert(res >= 0);
- uint d = (uint)text[ich++] - (uint)'0';
+ uint d = (uint)span[ich++] - (uint)'0';
if (d >= 10)
goto LFail;
@@ -1389,61 +1324,53 @@ private bool TryParseNonNegative(string text, int ich, int lim, out long result)
///
/// This produces zero for empty. It returns false if the text is not parsable as a signed integer
- /// or the result overflows. The min legal value is -max. The NA value is -max - 1.
+ /// or the result overflows. The min legal value is -max. The NA value null.
/// When it returns false, result is set to the NA value. The result can be NA on true return,
/// since some representations of NA are not considered parse failure.
///
- private bool TryParseSigned(long max, ref TX span, out long result)
+ private void TryParseSigned(long max, ref TX text, out long? result)
{
Contracts.Assert(max > 0);
Contracts.Assert((max & (max + 1)) == 0);
- if (!span.HasChars)
+ if (text.IsEmpty)
{
- if (span.IsNA)
- result = -max - 1;
- else
- result = 0;
- return true;
+ result = default(long);
+ return;
}
- int ichMin;
- int ichLim;
- string text = span.GetRawUnderlyingBufferInfo(out ichMin, out ichLim);
-
- long val;
+ ulong val;
+ var span = text.Span;
if (span[0] == '-')
{
- if (span.Length == 1 ||
- !TryParseNonNegative(text, ichMin + 1, ichLim, out val) ||
- val > max)
+ if (span.Length == 1 || !TryParseCore(span.Slice(1), out val) || (val > ((ulong)max + 1)))
{
- result = -max - 1;
- return false;
+ result = null;
+ return;
}
Contracts.Assert(val >= 0);
result = -(long)val;
- Contracts.Assert(long.MinValue < result && result <= 0);
- return true;
+ Contracts.Assert(long.MinValue <= result && result <= 0);
+ return;
}
- if (!TryParseNonNegative(text, ichMin, ichLim, out val))
+ long sVal;
+ if (!TryParseNonNegative(span, out sVal))
{
- // Check for acceptable NA forms: ? NaN NA and N/A.
- result = -max - 1;
- return IsStdMissing(ref span);
+ result = null;
+ return;
}
- Contracts.Assert(val >= 0);
- if (val > max)
+ Contracts.Assert(sVal >= 0);
+ if (sVal > max)
{
- result = -max - 1;
- return false;
+ result = null;
+ return;
}
- result = (long)val;
+ result = (long)sVal;
Contracts.Assert(0 <= result && result <= long.MaxValue);
- return true;
+ return;
}
///
@@ -1452,10 +1379,11 @@ private bool TryParseSigned(long max, ref TX span, out long result)
///
public bool TryParse(ref TX src, out R4 dst)
{
- if (src.TryParse(out dst))
+ var span = src.Span;
+ if (DoubleParser.TryParse(span, out dst))
return true;
dst = R4.NaN;
- return IsStdMissing(ref src);
+ return IsStdMissing(ref span);
}
///
@@ -1464,108 +1392,90 @@ public bool TryParse(ref TX src, out R4 dst)
///
public bool TryParse(ref TX src, out R8 dst)
{
- if (src.TryParse(out dst))
+ var span = src.Span;
+ if (DoubleParser.TryParse(span, out dst))
return true;
dst = R8.NaN;
- return IsStdMissing(ref src);
+ return IsStdMissing(ref span);
}
public bool TryParse(ref TX src, out TS dst)
{
- if (!src.HasChars)
+ if (src.IsEmpty)
{
- if (src.IsNA)
- dst = TS.NA;
- else
- dst = default(TS);
+ dst = default;
return true;
}
- TimeSpan res;
- if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out res))
- {
- dst = new TS(res);
+
+ if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out dst))
return true;
- }
- dst = TS.NA;
- return IsStdMissing(ref src);
+ var span = src.Span;
+ Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value.");
+ return true;
}
public bool TryParse(ref TX src, out DT dst)
{
- if (!src.HasChars)
+ if (src.IsEmpty)
{
- if (src.IsNA)
- dst = DvDateTime.NA;
- else
- dst = default(DvDateTime);
+ dst = default;
return true;
}
- DateTime res;
- if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out res))
- {
- dst = new DT(res);
+
+ if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out dst))
return true;
- }
- dst = DvDateTime.NA;
- return IsStdMissing(ref src);
+
+ var span = src.Span;
+ Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value.");
+ return true;
}
public bool TryParse(ref TX src, out DZ dst)
{
- if (!src.HasChars)
+ if (src.IsEmpty)
{
- if (src.IsNA)
- dst = DvDateTimeZone.NA;
- else
- dst = default(DvDateTimeZone);
+ dst = default;
return true;
}
- DateTimeOffset res;
- if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out res))
- {
- dst = new DZ(res);
+
+ if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out dst))
return true;
- }
- dst = DvDateTimeZone.NA;
- return IsStdMissing(ref src);
+
+ var span = src.Span;
+ Contracts.Check(!IsStdMissing(ref span), "Missing values cannot be converted to boolean value.");
+ return true;
}
- // These map unparsable and overflow values to "NA", which is the value Ix.MinValue. Note that this NA
- // value is the "evil" value - the non-zero value, x, such that x == -x. Note also, that for I4, this
- // matches R's representation of NA.
+ // These throw an exception for unparsable and overflow values.
private I1 ParseI1(ref TX src)
{
- long res;
- bool f = TryParseSigned(RawI1.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I1.RawNA);
- Contracts.Assert((RawI1)res == res);
- return (RawI1)res;
+ TryParseSigned(I1.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to sbyte.");
+ Contracts.Check((I1)res == res, "Overflow or underflow occured while converting value in text to sbyte.");
+ return (I1)res;
}
private I2 ParseI2(ref TX src)
{
- long res;
- bool f = TryParseSigned(RawI2.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I2.RawNA);
- Contracts.Assert((RawI2)res == res);
- return (RawI2)res;
+ TryParseSigned(I2.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to short.");
+ Contracts.Check((I2)res == res, "Overflow or underflow occured while converting value in text to short.");
+ return (I2)res;
}
private I4 ParseI4(ref TX src)
{
- long res;
- bool f = TryParseSigned(RawI4.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I4.RawNA);
- Contracts.Assert((RawI4)res == res);
- return (RawI4)res;
+ TryParseSigned(I4.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to int.");
+ Contracts.Check((I4)res == res, "Overflow or underflow occured while converting value in text to int.");
+ return (I4)res;
}
private I8 ParseI8(ref TX src)
{
- long res;
- bool f = TryParseSigned(RawI8.MaxValue, ref src, out res);
- Contracts.Assert(f || res == I8.RawNA);
- return res;
+ TryParseSigned(I8.MaxValue, ref src, out long? res);
+ Contracts.Check(res.HasValue, "Value could not be parsed from text to long.");
+ return res.Value;
}
// These map unparsable and overflow values to zero. The unsigned integer types do not have an NA value.
@@ -1618,116 +1528,113 @@ private U8 ParseU8(ref TX span)
///
public bool TryParse(ref TX src, out BL dst)
{
- // NA text fails.
- if (src.IsNA)
- {
- dst = BL.NA;
- return true;
- }
+ var span = src.Span;
+
+ Contracts.Check(!IsStdMissing(ref span), "Missing text values cannot be converted to bool value.");
char ch;
switch (src.Length)
{
- case 0:
- // Empty succeeds and maps to false.
- dst = BL.False;
- return true;
-
- case 1:
- switch (src[0])
- {
- case 'T':
- case 't':
- case 'Y':
- case 'y':
- case '1':
- case '+':
- dst = BL.True;
- return true;
- case 'F':
- case 'f':
- case 'N':
- case 'n':
- case '0':
- case '-':
- dst = BL.False;
+ case 0:
+ // Empty succeeds and maps to false.
+ dst = false;
return true;
- }
- break;
- case 2:
- switch (src[0])
- {
- case 'N':
- case 'n':
- if ((ch = src[1]) != 'O' && ch != 'o')
- break;
- dst = BL.False;
- return true;
- case '+':
- if ((ch = src[1]) != '1')
- break;
- dst = BL.True;
- return true;
- case '-':
- if ((ch = src[1]) != '1')
- break;
- dst = BL.False;
- return true;
- }
- break;
+ case 1:
+ switch (span[0])
+ {
+ case 'T':
+ case 't':
+ case 'Y':
+ case 'y':
+ case '1':
+ case '+':
+ dst = true;
+ return true;
+ case 'F':
+ case 'f':
+ case 'N':
+ case 'n':
+ case '0':
+ case '-':
+ dst = false;
+ return true;
+ }
+ break;
- case 3:
- switch (src[0])
- {
- case 'Y':
- case 'y':
- if ((ch = src[1]) != 'E' && ch != 'e')
- break;
- if ((ch = src[2]) != 'S' && ch != 's')
- break;
- dst = BL.True;
- return true;
- }
- break;
+ case 2:
+ switch (span[0])
+ {
+ case 'N':
+ case 'n':
+ if ((ch = span[1]) != 'O' && ch != 'o')
+ break;
+ dst = false;
+ return true;
+ case '+':
+ if ((ch = span[1]) != '1')
+ break;
+ dst = true;
+ return true;
+ case '-':
+ if ((ch = span[1]) != '1')
+ break;
+ dst = false;
+ return true;
+ }
+ break;
- case 4:
- switch (src[0])
- {
- case 'T':
- case 't':
- if ((ch = src[1]) != 'R' && ch != 'r')
- break;
- if ((ch = src[2]) != 'U' && ch != 'u')
- break;
- if ((ch = src[3]) != 'E' && ch != 'e')
- break;
- dst = BL.True;
- return true;
- }
- break;
+ case 3:
+ switch (span[0])
+ {
+ case 'Y':
+ case 'y':
+ if ((ch = span[1]) != 'E' && ch != 'e')
+ break;
+ if ((ch = span[2]) != 'S' && ch != 's')
+ break;
+ dst = true;
+ return true;
+ }
+ break;
- case 5:
- switch (src[0])
- {
- case 'F':
- case 'f':
- if ((ch = src[1]) != 'A' && ch != 'a')
- break;
- if ((ch = src[2]) != 'L' && ch != 'l')
- break;
- if ((ch = src[3]) != 'S' && ch != 's')
- break;
- if ((ch = src[4]) != 'E' && ch != 'e')
- break;
- dst = BL.False;
- return true;
- }
- break;
+ case 4:
+ switch (span[0])
+ {
+ case 'T':
+ case 't':
+ if ((ch = span[1]) != 'R' && ch != 'r')
+ break;
+ if ((ch = span[2]) != 'U' && ch != 'u')
+ break;
+ if ((ch = span[3]) != 'E' && ch != 'e')
+ break;
+ dst = true;
+ return true;
+ }
+ break;
+
+ case 5:
+ switch (span[0])
+ {
+ case 'F':
+ case 'f':
+ if ((ch = span[1]) != 'A' && ch != 'a')
+ break;
+ if ((ch = span[2]) != 'L' && ch != 'l')
+ break;
+ if ((ch = span[3]) != 'S' && ch != 's')
+ break;
+ if ((ch = span[4]) != 'E' && ch != 'e')
+ break;
+ dst = false;
+ return true;
+ }
+ break;
}
- dst = BL.NA;
- return IsStdMissing(ref src);
+ dst = false;
+ return false;
}
private bool TryParse(ref TX src, out TX dst)
@@ -1773,16 +1680,18 @@ public void Convert(ref TX span, ref UG value)
if (!TryParse(ref span, out value))
Contracts.Assert(value.Equals(default(UG)));
}
- public void Convert(ref TX span, ref R4 value)
+ public void Convert(ref TX src, ref R4 value)
{
- if (span.TryParse(out value))
+ var span = src.Span;
+ if (DoubleParser.TryParse(span, out value))
return;
// Unparsable is mapped to NA.
value = R4.NaN;
}
- public void Convert(ref TX span, ref R8 value)
+ public void Convert(ref TX src, ref R8 value)
{
- if (span.TryParse(out value))
+ var span = src.Span;
+ if (DoubleParser.TryParse(span, out value))
return;
// Unparsable is mapped to NA.
value = R8.NaN;
@@ -1791,43 +1700,32 @@ public void Convert(ref TX span, ref TX value)
{
value = span;
}
- public void Convert(ref TX span, ref BL value)
+ public void Convert(ref TX src, ref BL value)
{
- // When TryParseBL returns false, it should have set value to NA.
- if (!TryParse(ref span, out value))
- Contracts.Assert(value.IsNA);
+ // When TryParseBL returns false, it should have set value to false.
+ if (!TryParse(ref src, out value))
+ Contracts.Assert(!value);
}
public void Convert(ref TX src, ref SB dst)
{
ClearDst(ref dst);
- if (src.HasChars)
- src.AddToStringBuilder(dst);
+ if (!src.IsEmpty)
+ dst.AppendMemory(src);
}
- public void Convert(ref TX span, ref TS value)
- {
- if (!TryParse(ref span, out value))
- Contracts.Assert(value.IsNA);
- }
- public void Convert(ref TX span, ref DT value)
- {
- if (!TryParse(ref span, out value))
- Contracts.Assert(value.IsNA);
- }
- public void Convert(ref TX span, ref DZ value)
- {
- if (!TryParse(ref span, out value))
- Contracts.Assert(value.IsNA);
- }
+ public void Convert(ref TX span, ref TS value) => TryParse(ref span, out value);
+ public void Convert(ref TX span, ref DT value) => TryParse(ref span, out value);
+ public void Convert(ref TX span, ref DZ value) => TryParse(ref span, out value);
+
#endregion FromTX
#region FromBL
- public void Convert(ref BL src, ref I1 dst) => dst = (I1)src;
- public void Convert(ref BL src, ref I2 dst) => dst = (I2)src;
- public void Convert(ref BL src, ref I4 dst) => dst = (I4)src;
- public void Convert(ref BL src, ref I8 dst) => dst = (I8)src;
- public void Convert(ref BL src, ref R4 dst) => dst = (R4)src;
- public void Convert(ref BL src, ref R8 dst) => dst = (R8)src;
+ public void Convert(ref BL src, ref I1 dst) => dst = (I1)(object)src;
+ public void Convert(ref BL src, ref I2 dst) => dst = (I2)(object)src;
+ public void Convert(ref BL src, ref I4 dst) => dst = (I4)(object)src;
+ public void Convert(ref BL src, ref I8 dst) => dst = (I8)(object)src;
+ public void Convert(ref BL src, ref R4 dst) => dst = System.Convert.ToSingle(src);
+ public void Convert(ref BL src, ref R8 dst) => dst = System.Convert.ToDouble(src);
public void Convert(ref BL src, ref BL dst) => dst = src;
#endregion FromBL
}
diff --git a/src/Microsoft.ML.Data/Data/DataViewUtils.cs b/src/Microsoft.ML.Data/Data/DataViewUtils.cs
index 1db4d5ad0a..17307186fd 100644
--- a/src/Microsoft.ML.Data/Data/DataViewUtils.cs
+++ b/src/Microsoft.ML.Data/Data/DataViewUtils.cs
@@ -1312,14 +1312,14 @@ public ValueGetter GetGetter(int col)
}
}
- public static ValueGetter[] PopulateGetterArray(IRowCursor cursor, List colIndices)
+ public static ValueGetter>[] PopulateGetterArray(IRowCursor cursor, List colIndices)
{
var n = colIndices.Count;
- var getters = new ValueGetter[n];
+ var getters = new ValueGetter>[n];
for (int i = 0; i < n; i++)
{
- ValueGetter getter;
+ ValueGetter> getter;
var srcColIndex = colIndices[i];
var colType = cursor.Schema.GetColumnType(srcColIndex);
@@ -1340,7 +1340,7 @@ public static ValueGetter[] PopulateGetterArray(IRowCursor cursor, List<
return getters;
}
- public static ValueGetter GetSingleValueGetter(IRow cursor, int i, ColumnType colType)
+ public static ValueGetter> GetSingleValueGetter(IRow cursor, int i, ColumnType colType)
{
var floatGetter = cursor.GetGetter(i);
T v = default(T);
@@ -1359,18 +1359,18 @@ public static ValueGetter GetSingleValueGetter(IRow cursor, int i, Co
}
StringBuilder dst = null;
- ValueGetter getter =
- (ref DvText value) =>
+ ValueGetter> getter =
+ (ref ReadOnlyMemory value) =>
{
floatGetter(ref v);
conversion(ref v, ref dst);
string text = dst.ToString();
- value = new DvText(text);
+ value = text.AsMemory();
};
return getter;
}
- public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int colIndex, ColumnType colType)
+ public static ValueGetter> GetVectorFlatteningGetter(IRow cursor, int colIndex, ColumnType colType)
{
var vecGetter = cursor.GetGetter>(colIndex);
var vbuf = default(VBuffer);
@@ -1378,8 +1378,8 @@ public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int
ValueMapper conversion;
Conversions.Instance.TryGetStringConversion(colType, out conversion);
StringBuilder dst = null;
- ValueGetter getter =
- (ref DvText value) =>
+ ValueGetter> getter =
+ (ref ReadOnlyMemory value) =>
{
vecGetter(ref vbuf);
@@ -1393,7 +1393,7 @@ public static ValueGetter GetVectorFlatteningGetter(IRow cursor, int
conversion(ref v, ref dst);
return dst.ToString();
}));
- value = new DvText(string.Format("<{0}{1}>", stringRep, suffix));
+ value = string.Format("<{0}{1}>", stringRep, suffix).AsMemory();
};
return getter;
}
diff --git a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs
index 091fe26cb2..9f09e0da51 100644
--- a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs
+++ b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs
@@ -394,16 +394,16 @@ private static ValueGetter GetLabelGetterNotFloat(IRow cursor, int label
Contracts.Assert(type != NumberType.R4 && type != NumberType.R8);
- // DvBool type label mapping: True -> 1, False -> 0, NA -> NaN.
+ // boolean type label mapping: True -> 1, False -> 0.
if (type.IsBool)
{
- var getBoolSrc = cursor.GetGetter(labelIndex);
+ var getBoolSrc = cursor.GetGetter(labelIndex);
return
(ref Single dst) =>
{
- DvBool src = DvBool.NA;
+ bool src = default;
getBoolSrc(ref src);
- dst = (Single)src;
+ dst = Convert.ToSingle(src);
};
}
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs
index 7bc0a8d2ad..582212738a 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs
@@ -729,7 +729,13 @@ public void GetMetadata(string kind, int col, ref TValue value)
///
/// Upper inclusive bound of versions this reader can read.
///
- private const ulong ReaderVersion = MissingTextVersion;
+ private const ulong ReaderVersion = StandardDataTypesVersion;
+
+ ///
+ /// The first version that removes DvTypes and uses .NET standard
+ /// data types.
+ ///
+ private const ulong StandardDataTypesVersion = 0x0001000100010006;
///
/// The first version of the format that accomodated DvText.NA.
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs
index d04adaf099..544a8c60f5 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs
@@ -44,26 +44,28 @@ public CodecFactory(IHostEnvironment env, MemoryStreamPool memPool = null)
_loadNameToCodecCreator = new Dictionary();
_simpleCodecTypeMap = new Dictionary();
// Register the current codecs.
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new UnsafeTypeCodec(this));
- RegisterSimpleCodec(new DvTextCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new UnsafeTypeCodec(this));
+ RegisterSimpleCodec(new TextCodec(this));
RegisterSimpleCodec(new BoolCodec(this));
RegisterSimpleCodec(new DateTimeCodec(this));
- RegisterSimpleCodec(new DateTimeZoneCodec(this));
+ RegisterSimpleCodec(new DateTimeOffsetCodec(this));
RegisterSimpleCodec(new UnsafeTypeCodec(this));
- // Register the old boolean reading codec.
- var oldBool = new OldBoolCodec(this);
- RegisterOtherCodec(oldBool.LoadName, oldBool.GetCodec);
+ // Register the old type system reading codec.
+ RegisterOtherCodec("DvBool", new OldBoolCodec(this).GetCodec);
+ RegisterOtherCodec("DvDateTimeZone", new DateTimeOffsetCodec(this).GetCodec);
+ RegisterOtherCodec("DvDateTime", new DateTimeCodec(this).GetCodec);
+ RegisterOtherCodec("DvTimeSpan", new UnsafeTypeCodec(this).GetCodec);
RegisterOtherCodec("VBuffer", GetVBufferCodec);
RegisterOtherCodec("Key", GetKeyCodec);
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs
index f840773872..3e4f997431 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs
@@ -179,10 +179,10 @@ public override string LoadName
}
// Gatekeeper to ensure T is a type that is supported by UnsafeTypeCodec.
- // Throws an exception if T is neither a DvTimeSpan nor a NumberType.
+ // Throws an exception if T is neither a TimeSpan nor a NumberType.
private static ColumnType UnsafeColumnType(Type type)
{
- return type == typeof(DvTimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type);
+ return type == typeof(TimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type);
}
public UnsafeTypeCodec(CodecFactory factory)
@@ -305,9 +305,8 @@ public override void Read(T[] values, int index, int count)
}
}
- private sealed class DvTextCodec : SimpleCodec
+ private sealed class TextCodec : SimpleCodec>
{
- private const int MissingBit = unchecked((int)0x80000000);
private const int LengthMask = unchecked((int)0x7FFFFFFF);
public override string LoadName
@@ -320,43 +319,38 @@ public override string LoadName
// int[entries]: The non-decreasing end-boundary character index array, with high bit set for "missing" values.
// string: The UTF-8 encoded string, with the standard LEB128 byte-length preceeding it.
- public DvTextCodec(CodecFactory factory)
+ public TextCodec(CodecFactory factory)
: base(factory, TextType.Instance)
{
}
- public override IValueWriter OpenWriter(Stream stream)
+ public override IValueWriter> OpenWriter(Stream stream)
{
return new Writer(this, stream);
}
- public override IValueReader OpenReader(Stream stream, int items)
+ public override IValueReader> OpenReader(Stream stream, int items)
{
return new Reader(this, stream, items);
}
- private sealed class Writer : ValueWriterBase
+ private sealed class Writer : ValueWriterBase>
{
private StringBuilder _builder;
private List _boundaries;
- public Writer(DvTextCodec codec, Stream stream)
+ public Writer(TextCodec codec, Stream stream)
: base(codec.Factory, stream)
{
_builder = new StringBuilder();
_boundaries = new List();
}
- public override void Write(ref DvText value)
+ public override void Write(ref ReadOnlyMemory value)
{
Contracts.Check(_builder != null, "writer was already committed");
- if (value.IsNA)
- _boundaries.Add(_builder.Length | MissingBit);
- else
- {
- value.AddToStringBuilder(_builder);
- _boundaries.Add(_builder.Length);
- }
+ _builder.AppendMemory(value);
+ _boundaries.Add(_builder.Length);
}
public override void Commit()
@@ -378,14 +372,14 @@ public override long GetCommitLengthEstimate()
}
}
- private sealed class Reader : ValueReaderBase
+ private sealed class Reader : ValueReaderBase>
{
private readonly int _entries;
private readonly int[] _boundaries;
private int _index;
private string _text;
- public Reader(DvTextCodec codec, Stream stream, int items)
+ public Reader(TextCodec codec, Stream stream, int items)
: base(codec.Factory, stream)
{
_entries = Reader.ReadInt32();
@@ -408,29 +402,34 @@ public override void MoveNext()
Contracts.Check(++_index < _entries, "reader already read all values");
}
- public override void Get(ref DvText value)
+ public override void Get(ref ReadOnlyMemory value)
{
Contracts.Assert(_index < _entries);
int b = _boundaries[_index + 1];
- if (b < 0)
- value = DvText.NA;
+ int start = _boundaries[_index] & LengthMask;
+ if (b >= 0)
+ value = _text.AsMemory().Slice(start, (b & LengthMask) - start);
else
- value = new DvText(_text, _boundaries[_index] & LengthMask, b & LengthMask);
+ {
+ //For backward compatiblity when NA values existed, treat them
+ //as empty string.
+ value = ReadOnlyMemory.Empty;
+ }
}
}
}
///
- /// This is an older boolean code that reads from a form that serialized
- /// 1 bit per value. The new encoding (implemented by a different codec)
+ /// This is a boolean code that reads from a form that serialized
+ /// 1 bit per value. The old encoding (implemented by a different codec)
/// uses 2 bits per value so NA values can be supported.
///
- private sealed class OldBoolCodec : SimpleCodec
+ private sealed class BoolCodec : SimpleCodec
{
// *** Binary block format ***
// Packed bits.
- public OldBoolCodec(CodecFactory factory)
+ public BoolCodec(CodecFactory factory)
: base(factory, BoolType.Instance)
{
}
@@ -440,24 +439,70 @@ public override string LoadName
get { return typeof(bool).Name; }
}
- public override IValueWriter OpenWriter(Stream stream)
+ public override IValueWriter OpenWriter(Stream stream)
{
- Contracts.Assert(false, "This older form only supports reading");
- throw Contracts.ExceptNotSupp("Writing single bit booleans no longer supported");
+ return new Writer(this, stream);
+ }
+
+ private sealed class Writer : ValueWriterBase
+ {
+ // Pack 8 values into 8 bits.
+ private byte _currentBits;
+ private long _numWritten;
+ private byte _currentIndex;
+
+ public Writer(BoolCodec codec, Stream stream)
+ : base(codec.Factory, stream)
+ {
+ }
+
+ public override void Write(ref bool value)
+ {
+ Contracts.Assert(0 <= _currentIndex && _currentIndex < 8);
+
+ _numWritten++;
+ if (value)
+ _currentBits |= (byte)(1 << _currentIndex);
+
+ _currentIndex++;
+ if (_currentIndex == 8)
+ {
+ Writer.Write(_currentBits);
+ _currentBits = 0;
+ _currentIndex = 0;
+ }
+ }
+
+ // REVIEW: More efficient array writers are certainly possible.
+
+ public override long GetCommitLengthEstimate()
+ {
+ return 4 * (((_numWritten - 1) >> 4) + 1);
+ }
+
+ public override void Commit()
+ {
+ if (_currentIndex > 0)
+ {
+ Writer.Write(_currentBits);
+ _currentBits = 0;
+ _currentIndex = 0;
+ }
+ }
}
- public override IValueReader OpenReader(Stream stream, int items)
+ public override IValueReader OpenReader(Stream stream, int items)
{
return new Reader(this, stream, items);
}
- private sealed class Reader : ValueReaderBase
+ private sealed class Reader : ValueReaderBase
{
private byte _currentBits;
private int _currentIndex;
private int _remaining;
- public Reader(OldBoolCodec codec, Stream stream, int items)
+ public Reader(BoolCodec codec, Stream stream, int items)
: base(codec.Factory, stream)
{
_remaining = items;
@@ -474,7 +519,7 @@ public override void MoveNext()
_currentBits >>= 1;
}
- public override void Get(ref DvBool value)
+ public override void Get(ref bool value)
{
Contracts.Assert(0 <= _currentIndex, "have not moved in");
Contracts.Assert(_currentIndex < 8);
@@ -483,83 +528,34 @@ public override void Get(ref DvBool value)
}
}
- private sealed class BoolCodec : SimpleCodec
+ private sealed class OldBoolCodec : SimpleCodec
{
// *** Binary block format ***
// Pack 16 values into 32 bits, with 00 for false, 01 for true and 10 for NA.
- public BoolCodec(CodecFactory factory)
+ public OldBoolCodec(CodecFactory factory)
: base(factory, BoolType.Instance)
{
}
- public override IValueWriter OpenWriter(Stream stream)
+ public override IValueWriter OpenWriter(Stream stream)
{
- return new Writer(this, stream);
+ Contracts.Assert(false, "This older form only supports reading");
+ throw Contracts.ExceptNotSupp("Writing single bit booleans no longer supported");
}
- public override IValueReader OpenReader(Stream stream, int items)
+ public override IValueReader OpenReader(Stream stream, int items)
{
return new Reader(this, stream, items);
}
- private sealed class Writer : ValueWriterBase
- {
- // Pack 16 values into 32 bits.
- private int _currentBits;
- private long _numWritten;
- private int _currentIndex;
-
- public Writer(BoolCodec codec, Stream stream)
- : base(codec.Factory, stream)
- {
- }
-
- public override void Write(ref DvBool value)
- {
- Contracts.Assert(0 <= _currentIndex && _currentIndex < 32);
- Contracts.Assert((_currentIndex & 1) == 0);
-
- _numWritten++;
- if (value.IsTrue)
- _currentBits |= 1 << _currentIndex;
- else if (!value.IsFalse)
- _currentBits |= 2 << _currentIndex;
-
- _currentIndex += 2;
- if (_currentIndex == 32)
- {
- Writer.Write(_currentBits);
- _currentBits = 0;
- _currentIndex = 0;
- }
- }
-
- // REVIEW: More efficient array writers are certainly possible.
-
- public override long GetCommitLengthEstimate()
- {
- return 4 * (((_numWritten - 1) >> 4) + 1);
- }
-
- public override void Commit()
- {
- if (_currentIndex > 0)
- {
- Writer.Write(_currentBits);
- _currentBits = 0;
- _currentIndex = 0;
- }
- }
- }
-
- private sealed class Reader : ValueReaderBase
+ private sealed class Reader : ValueReaderBase
{
private int _currentBits;
private int _currentSlot;
private int _remaining;
- public Reader(BoolCodec codec, Stream stream, int items)
+ public Reader(OldBoolCodec codec, Stream stream, int items)
: base(codec.Factory, stream)
{
_remaining = items;
@@ -576,20 +572,20 @@ public override void MoveNext()
_currentBits = (int)((uint)_currentBits >> 2);
}
- public override void Get(ref DvBool value)
+ public override void Get(ref bool value)
{
Contracts.Assert(0 <= _currentSlot, "have not moved in");
Contracts.Assert(_currentSlot < 16);
switch (_currentBits & 0x3)
{
case 0x0:
- value = DvBool.False;
+ value = false;
break;
case 0x1:
- value = DvBool.True;
+ value = true;
break;
case 0x2:
- value = DvBool.NA;
+ value = false;
break;
default:
throw Contracts.ExceptDecode("Invalid bit pattern in BoolCodec");
@@ -598,24 +594,24 @@ public override void Get(ref DvBool value)
}
}
- private sealed class DateTimeCodec : SimpleCodec
+ private sealed class DateTimeCodec : SimpleCodec
{
public DateTimeCodec(CodecFactory factory)
: base(factory, DateTimeType.Instance)
{
}
- public override IValueWriter OpenWriter(Stream stream)
+ public override IValueWriter OpenWriter(Stream stream)
{
return new Writer(this, stream);
}
- public override IValueReader OpenReader(Stream stream, int items)
+ public override IValueReader OpenReader(Stream stream, int items)
{
return new Reader(this, stream, items);
}
- private sealed class Writer : ValueWriterBase
+ private sealed class Writer : ValueWriterBase
{
private long _numWritten;
@@ -624,11 +620,9 @@ public Writer(DateTimeCodec codec, Stream stream)
{
}
- public override void Write(ref DvDateTime value)
+ public override void Write(ref DateTime value)
{
- var ticks = value.Ticks.RawValue;
- Contracts.Assert(ticks == DvInt8.RawNA || (ulong)ticks <= DvDateTime.MaxTicks);
- Writer.Write(ticks);
+ Writer.Write(value.Ticks);
_numWritten++;
}
@@ -639,14 +633,14 @@ public override void Commit()
public override long GetCommitLengthEstimate()
{
- return _numWritten * sizeof(Int64);
+ return _numWritten * sizeof(long);
}
}
- private sealed class Reader : ValueReaderBase
+ private sealed class Reader : ValueReaderBase
{
private int _remaining;
- private DvDateTime _value;
+ private DateTime _value;
public Reader(DateTimeCodec codec, Stream stream, int items)
: base(codec.Factory, stream)
@@ -657,74 +651,64 @@ public Reader(DateTimeCodec codec, Stream stream, int items)
public override void MoveNext()
{
Contracts.Assert(_remaining > 0, "already consumed all values");
- var value = Reader.ReadInt64();
- Contracts.CheckDecode(value == DvInt8.RawNA || (ulong)value <= DvDateTime.MaxTicks);
- _value = new DvDateTime(value);
+
+ var ticks = Reader.ReadInt64();
+ _value = new DateTime(ticks == long.MinValue ? default : ticks);
_remaining--;
}
- public override void Get(ref DvDateTime value)
+ public override void Get(ref DateTime value)
{
value = _value;
}
}
}
- private sealed class DateTimeZoneCodec : SimpleCodec
+ private sealed class DateTimeOffsetCodec : SimpleCodec
{
- private readonly MadeObjectPool _shortBufferPool;
private readonly MadeObjectPool _longBufferPool;
+ private readonly MadeObjectPool _shortBufferPool;
- public DateTimeZoneCodec(CodecFactory factory)
- : base(factory, DateTimeZoneType.Instance)
+ public DateTimeOffsetCodec(CodecFactory factory)
+ : base(factory, DateTimeOffsetType.Instance)
{
- _shortBufferPool = new MadeObjectPool(() => null);
_longBufferPool = new MadeObjectPool(() => null);
+ _shortBufferPool = new MadeObjectPool(() => null);
}
- public override IValueWriter OpenWriter(Stream stream)
+ public override IValueWriter OpenWriter(Stream stream)
{
return new Writer(this, stream);
}
- public override IValueReader OpenReader(Stream stream, int items)
+ public override IValueReader OpenReader(Stream stream, int items)
{
return new Reader(this, stream, items);
}
- private sealed class Writer : ValueWriterBase
+ private sealed class Writer : ValueWriterBase
{
private List _offsets;
private List _ticks;
- public Writer(DateTimeZoneCodec codec, Stream stream)
+ public Writer(DateTimeOffsetCodec codec, Stream stream)
: base(codec.Factory, stream)
{
_offsets = new List();
_ticks = new List();
}
- public override void Write(ref DvDateTimeZone value)
+ public override void Write(ref DateTimeOffset value)
{
Contracts.Assert(_offsets != null, "writer was already committed");
- var ticks = value.ClockDateTime.Ticks;
- var offset = value.OffsetMinutes;
+ _ticks.Add(value.DateTime.Ticks);
- _ticks.Add(ticks.RawValue);
- if (ticks.IsNA)
- {
- Contracts.Assert(offset.IsNA);
- _offsets.Add(0);
- }
- else
- {
- Contracts.Assert(
- offset.RawValue >= DvDateTimeZone.MinMinutesOffset &&
- offset.RawValue <= DvDateTimeZone.MaxMinutesOffset);
- Contracts.Assert(0 <= ticks.RawValue && ticks.RawValue <= DvDateTime.MaxTicks);
- _offsets.Add(offset.RawValue);
- }
+ //DateTimeOffset exposes its offset as a TimeSpan, but internally it uses short and in minutes.
+ //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L51-L53
+ //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L286-L292
+ //From everything online(ISO8601, RFC3339, SQL Server doc, the offset supports the range -14 to 14 hours, and only supports minute precision.
+ _offsets.Add((short)(value.Offset.TotalMinutes));
}
public override void Commit()
@@ -740,13 +724,13 @@ public override void Commit()
public override long GetCommitLengthEstimate()
{
- return (long)_offsets.Count * (sizeof(Int64) + sizeof(Int16));
+ return (long)_offsets.Count * (sizeof(long) + sizeof(short));
}
}
- private sealed class Reader : ValueReaderBase
+ private sealed class Reader : ValueReaderBase
{
- private readonly DateTimeZoneCodec _codec;
+ private readonly DateTimeOffsetCodec _codec;
private readonly int _entries;
private short[] _offsets;
@@ -754,7 +738,7 @@ private sealed class Reader : ValueReaderBase
private int _index;
private bool _disposed;
- public Reader(DateTimeZoneCodec codec, Stream stream, int items)
+ public Reader(DateTimeOffsetCodec codec, Stream stream, int items)
: base(codec.Factory, stream)
{
_codec = codec;
@@ -764,17 +748,12 @@ public Reader(DateTimeZoneCodec codec, Stream stream, int items)
_offsets = _codec._shortBufferPool.Get();
Utils.EnsureSize(ref _offsets, _entries, false);
for (int i = 0; i < _entries; i++)
- {
_offsets[i] = Reader.ReadInt16();
- Contracts.CheckDecode(DvDateTimeZone.MinMinutesOffset <= _offsets[i] && _offsets[i] <= DvDateTimeZone.MaxMinutesOffset);
- }
+
_ticks = _codec._longBufferPool.Get();
Utils.EnsureSize(ref _ticks, _entries, false);
for (int i = 0; i < _entries; i++)
- {
_ticks[i] = Reader.ReadInt64();
- Contracts.CheckDecode(_ticks[i] == DvInt8.RawNA || (ulong)_ticks[i] <= DvDateTime.MaxTicks);
- }
}
public override void MoveNext()
@@ -783,10 +762,12 @@ public override void MoveNext()
Contracts.Check(++_index < _entries, "reader already read all values");
}
- public override void Get(ref DvDateTimeZone value)
+ public override void Get(ref DateTimeOffset value)
{
Contracts.Assert(!_disposed);
- value = new DvDateTimeZone(_ticks[_index], _offsets[_index]);
+ var ticks = _ticks[_index];
+ var offset = _offsets[_index];
+ value = new DateTimeOffset(new DateTime(ticks == long.MinValue ? default : ticks), new TimeSpan(0, offset == short.MinValue ? default : offset, 0));
}
public override void Dispose()
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs
index 36186cf7af..b552ab6523 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Header.cs
@@ -34,8 +34,9 @@ public struct Header
//public const ulong WriterVersion = 0x0001000100010002; // Codec changes.
//public const ulong WriterVersion = 0x0001000100010003; // Slot names.
//public const ulong WriterVersion = 0x0001000100010004; // Column metadata.
- public const ulong WriterVersion = 0x0001000100010005; // "NA" DvText support.
- public const ulong CanBeReadByVersion = 0x0001000100010005;
+ //public const ulong WriterVersion = 0x0001000100010005; // "NA" DvText support.
+ public const ulong WriterVersion = 0x0001000100010006; // Replace DvTypes with .NET Standard data types.
+ public const ulong CanBeReadByVersion = 0x0001000100010006;
internal static string VersionToString(ulong v)
{
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs
index 026228d6be..b63f361fe2 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs
@@ -32,21 +32,17 @@ internal static class UnsafeTypeOpsFactory
static UnsafeTypeOpsFactory()
{
_type2ops = new Dictionary();
- _type2ops[typeof(SByte)] = new SByteUnsafeTypeOps();
- _type2ops[typeof(DvInt1)] = new DvI1UnsafeTypeOps();
+ _type2ops[typeof(sbyte)] = new SByteUnsafeTypeOps();
_type2ops[typeof(Byte)] = new ByteUnsafeTypeOps();
- _type2ops[typeof(Int16)] = new Int16UnsafeTypeOps();
- _type2ops[typeof(DvInt2)] = new DvI2UnsafeTypeOps();
+ _type2ops[typeof(short)] = new Int16UnsafeTypeOps();
_type2ops[typeof(UInt16)] = new UInt16UnsafeTypeOps();
- _type2ops[typeof(Int32)] = new Int32UnsafeTypeOps();
- _type2ops[typeof(DvInt4)] = new DvI4UnsafeTypeOps();
+ _type2ops[typeof(int)] = new Int32UnsafeTypeOps();
_type2ops[typeof(UInt32)] = new UInt32UnsafeTypeOps();
- _type2ops[typeof(Int64)] = new Int64UnsafeTypeOps();
- _type2ops[typeof(DvInt8)] = new DvI8UnsafeTypeOps();
+ _type2ops[typeof(long)] = new Int64UnsafeTypeOps();
_type2ops[typeof(UInt64)] = new UInt64UnsafeTypeOps();
_type2ops[typeof(Single)] = new SingleUnsafeTypeOps();
_type2ops[typeof(Double)] = new DoubleUnsafeTypeOps();
- _type2ops[typeof(DvTimeSpan)] = new DvTimeSpanUnsafeTypeOps();
+ _type2ops[typeof(TimeSpan)] = new TimeSpanUnsafeTypeOps();
_type2ops[typeof(UInt128)] = new UgUnsafeTypeOps();
}
@@ -55,29 +51,16 @@ public static UnsafeTypeOps Get()
return (UnsafeTypeOps)_type2ops[typeof(T)];
}
- private sealed class SByteUnsafeTypeOps : UnsafeTypeOps
+ private sealed class SByteUnsafeTypeOps : UnsafeTypeOps
{
- public override int Size { get { return sizeof(SByte); } }
- public override unsafe void Apply(SByte[] array, Action func)
+ public override int Size { get { return sizeof(sbyte); } }
+ public override unsafe void Apply(sbyte[] array, Action func)
{
- fixed (SByte* pArray = array)
+ fixed (sbyte* pArray = array)
func(new IntPtr(pArray));
}
- public override void Write(SByte a, BinaryWriter writer) { writer.Write(a); }
- public override SByte Read(BinaryReader reader) { return reader.ReadSByte(); }
- }
-
- private sealed class DvI1UnsafeTypeOps : UnsafeTypeOps
- {
- public override int Size { get { return sizeof(SByte); } }
- public override unsafe void Apply(DvInt1[] array, Action func)
- {
- fixed (DvInt1* pArray = array)
- func(new IntPtr(pArray));
- }
-
- public override void Write(DvInt1 a, BinaryWriter writer) { writer.Write(a.RawValue); }
- public override DvInt1 Read(BinaryReader reader) { return reader.ReadSByte(); }
+ public override void Write(sbyte a, BinaryWriter writer) { writer.Write(a); }
+ public override sbyte Read(BinaryReader reader) { return reader.ReadSByte(); }
}
private sealed class ByteUnsafeTypeOps : UnsafeTypeOps
@@ -92,29 +75,16 @@ public override unsafe void Apply(Byte[] array, Action func)
public override Byte Read(BinaryReader reader) { return reader.ReadByte(); }
}
- private sealed class Int16UnsafeTypeOps : UnsafeTypeOps
+ private sealed class Int16UnsafeTypeOps : UnsafeTypeOps
{
- public override int Size { get { return sizeof(Int16); } }
- public override unsafe void Apply(Int16[] array, Action func)
+ public override int Size { get { return sizeof(short); } }
+ public override unsafe void Apply(short[] array, Action func)
{
- fixed (Int16* pArray = array)
+ fixed (short* pArray = array)
func(new IntPtr(pArray));
}
- public override void Write(Int16 a, BinaryWriter writer) { writer.Write(a); }
- public override Int16 Read(BinaryReader reader) { return reader.ReadInt16(); }
- }
-
- private sealed class DvI2UnsafeTypeOps : UnsafeTypeOps
- {
- public override int Size { get { return sizeof(Int16); } }
- public override unsafe void Apply(DvInt2[] array, Action func)
- {
- fixed (DvInt2* pArray = array)
- func(new IntPtr(pArray));
- }
-
- public override void Write(DvInt2 a, BinaryWriter writer) { writer.Write(a.RawValue); }
- public override DvInt2 Read(BinaryReader reader) { return reader.ReadInt16(); }
+ public override void Write(short a, BinaryWriter writer) { writer.Write(a); }
+ public override short Read(BinaryReader reader) { return reader.ReadInt16(); }
}
private sealed class UInt16UnsafeTypeOps : UnsafeTypeOps
@@ -129,29 +99,16 @@ public override unsafe void Apply(UInt16[] array, Action func)
public override UInt16 Read(BinaryReader reader) { return reader.ReadUInt16(); }
}
- private sealed class Int32UnsafeTypeOps : UnsafeTypeOps
- {
- public override int Size { get { return sizeof(Int32); } }
- public override unsafe void Apply(Int32[] array, Action func)
- {
- fixed (Int32* pArray = array)
- func(new IntPtr(pArray));
- }
- public override void Write(Int32 a, BinaryWriter writer) { writer.Write(a); }
- public override Int32 Read(BinaryReader reader) { return reader.ReadInt32(); }
- }
-
- private sealed class DvI4UnsafeTypeOps : UnsafeTypeOps
+ private sealed class Int32UnsafeTypeOps : UnsafeTypeOps
{
- public override int Size { get { return sizeof(Int32); } }
- public override unsafe void Apply(DvInt4[] array, Action func)
+ public override int Size { get { return sizeof(int); } }
+ public override unsafe void Apply(int[] array, Action func)
{
- fixed (DvInt4* pArray = array)
+ fixed (int* pArray = array)
func(new IntPtr(pArray));
}
-
- public override void Write(DvInt4 a, BinaryWriter writer) { writer.Write(a.RawValue); }
- public override DvInt4 Read(BinaryReader reader) { return reader.ReadInt32(); }
+ public override void Write(int a, BinaryWriter writer) { writer.Write(a); }
+ public override int Read(BinaryReader reader) { return reader.ReadInt32(); }
}
private sealed class UInt32UnsafeTypeOps : UnsafeTypeOps
@@ -166,29 +123,16 @@ public override unsafe void Apply(UInt32[] array, Action func)
public override UInt32 Read(BinaryReader reader) { return reader.ReadUInt32(); }
}
- private sealed class Int64UnsafeTypeOps : UnsafeTypeOps
+ private sealed class Int64UnsafeTypeOps : UnsafeTypeOps
{
- public override int Size { get { return sizeof(Int64); } }
- public override unsafe void Apply(Int64[] array, Action func)
+ public override int Size { get { return sizeof(long); } }
+ public override unsafe void Apply(long[] array, Action func)
{
- fixed (Int64* pArray = array)
+ fixed (long* pArray = array)
func(new IntPtr(pArray));
}
- public override void Write(Int64 a, BinaryWriter writer) { writer.Write(a); }
- public override Int64 Read(BinaryReader reader) { return reader.ReadInt64(); }
- }
-
- private sealed class DvI8UnsafeTypeOps : UnsafeTypeOps
- {
- public override int Size { get { return sizeof(Int64); } }
- public override unsafe void Apply(DvInt8[] array, Action func)
- {
- fixed (DvInt8* pArray = array)
- func(new IntPtr(pArray));
- }
-
- public override void Write(DvInt8 a, BinaryWriter writer) { writer.Write(a.RawValue); }
- public override DvInt8 Read(BinaryReader reader) { return reader.ReadInt64(); }
+ public override void Write(long a, BinaryWriter writer) { writer.Write(a); }
+ public override long Read(BinaryReader reader) { return reader.ReadInt64(); }
}
private sealed class UInt64UnsafeTypeOps : UnsafeTypeOps
@@ -227,17 +171,21 @@ public override unsafe void Apply(Double[] array, Action func)
public override Double Read(BinaryReader reader) { return reader.ReadDouble(); }
}
- private sealed class DvTimeSpanUnsafeTypeOps : UnsafeTypeOps
+ private sealed class TimeSpanUnsafeTypeOps : UnsafeTypeOps
{
- public override int Size { get { return sizeof(Int64); } }
- public override unsafe void Apply(DvTimeSpan[] array, Action func)
+ public override int Size { get { return sizeof(long); } }
+ public override unsafe void Apply(TimeSpan[] array, Action func)
{
- fixed (DvTimeSpan* pArray = array)
+ fixed (TimeSpan* pArray = array)
func(new IntPtr(pArray));
}
- public override void Write(DvTimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks.RawValue); }
- public override DvTimeSpan Read(BinaryReader reader) { return new DvTimeSpan(reader.ReadInt64()); }
+ public override void Write(TimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks); }
+ public override TimeSpan Read(BinaryReader reader)
+ {
+ var ticks = reader.ReadInt64();
+ return new TimeSpan(ticks == long.MinValue ? default : ticks);
+ }
}
private sealed class UgUnsafeTypeOps : UnsafeTypeOps
diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs
index d1b43dcf65..27ac28c717 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs
@@ -368,7 +368,7 @@ private sealed class Cursor : RootCursorBase, IRowCursor
private Delegate[] _getters;
private Delegate[] _subGetters; // Cached getters of the sub-cursor.
- private DvText[] _colValues; // Column values cached from the file path.
+ private ReadOnlyMemory[] _colValues; // Column values cached from the file path.
private IRowCursor _subCursor; // Sub cursor of the current file.
private IEnumerator _fileOrder;
@@ -384,7 +384,7 @@ public Cursor(IChannelProvider provider, PartitionedFileLoader parent, IMultiStr
_active = Utils.BuildArray(Schema.ColumnCount, predicate);
_subActive = _active.Take(SubColumnCount).ToArray();
- _colValues = new DvText[Schema.ColumnCount - SubColumnCount];
+ _colValues = new ReadOnlyMemory[Schema.ColumnCount - SubColumnCount];
_subGetters = new Delegate[SubColumnCount];
_getters = CreateGetters();
@@ -537,13 +537,13 @@ private void UpdateColumnValues(string path, List values)
var source = _parent._srcDirIndex[i];
if (source >= 0 && source < values.Count)
{
- _colValues[i] = new DvText(values[source]);
+ _colValues[i] = values[source].AsMemory();
}
else if (source == FilePathColIndex)
{
// Force Unix path for consistency.
var cleanPath = path.Replace(@"\", @"/");
- _colValues[i] = new DvText(cleanPath);
+ _colValues[i] = cleanPath.AsMemory();
}
}
}
@@ -602,7 +602,7 @@ private ValueGetter GetterDelegateCore(int col, ColumnType type)
Ch.Check(col >= 0 && col < _colValues.Length);
Ch.AssertValue(type);
- var conv = Conversions.Instance.GetStandardConversion(TextType.Instance, type) as ValueMapper;
+ var conv = Conversions.Instance.GetStandardConversion(TextType.Instance, type) as ValueMapper, TValue>;
if (conv == null)
{
throw Ch.Except("Invalid TValue: '{0}' of the conversion.", typeof(TValue));
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
index 391fb65739..3663c93cc4 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -511,12 +511,12 @@ private sealed class Bindings : ISchema
{
public readonly ColInfo[] Infos;
public readonly Dictionary NameToInfoIndex;
- private readonly VBuffer[] _slotNames;
+ private readonly VBuffer>[] _slotNames;
// Empty iff either header+ not set in args, or if no header present, or upon load
// there was no header stored in the model.
- private readonly DvText _header;
+ private readonly ReadOnlyMemory _header;
- private readonly MetadataUtils.MetadataGetter> _getSlotNames;
+ private readonly MetadataUtils.MetadataGetter>> _getSlotNames;
private Bindings()
{
@@ -546,7 +546,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile,
int inputSize = parent._inputSize;
ch.Assert(0 <= inputSize & inputSize < SrcLim);
- List lines = null;
+ List> lines = null;
if (headerFile != null)
Cursor.GetSomeLines(headerFile, 1, ref lines);
if (needInputSize && inputSize == 0)
@@ -712,11 +712,11 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile,
Infos[iinfoOther] = ColInfo.Create(cols[iinfoOther].Name.Trim(), typeOther, segsNew.ToArray(), true);
}
- _slotNames = new VBuffer[Infos.Length];
+ _slotNames = new VBuffer>[Infos.Length];
if ((parent.HasHeader || headerFile != null) && Utils.Size(lines) > 0)
_header = lines[0];
- if (_header.HasChars)
+ if (!_header.IsEmpty)
Parser.ParseSlotNames(parent, _header, Infos, _slotNames);
ch.Done();
@@ -797,12 +797,12 @@ public Bindings(ModelLoadContext ctx, TextLoader parent)
NameToInfoIndex[name] = iinfo;
}
- _slotNames = new VBuffer[Infos.Length];
+ _slotNames = new VBuffer>[Infos.Length];
string result = null;
ctx.TryLoadTextStream("Header.txt", reader => result = reader.ReadLine());
if (!string.IsNullOrEmpty(result))
- Parser.ParseSlotNames(parent, _header = new DvText(result), Infos, _slotNames);
+ Parser.ParseSlotNames(parent, _header = result.AsMemory(), Infos, _slotNames);
}
public void Save(ModelSaveContext ctx)
@@ -850,7 +850,7 @@ public void Save(ModelSaveContext ctx)
}
// Save header in an easily human inspectable separate entry.
- if (_header.HasChars)
+ if (!_header.IsEmpty)
ctx.SaveTextStream("Header.txt", writer => writer.WriteLine(_header.ToString()));
}
@@ -924,7 +924,7 @@ public void GetMetadata(string kind, int col, ref TValue value)
}
}
- private void GetSlotNames(int col, ref VBuffer dst)
+ private void GetSlotNames(int col, ref VBuffer> dst)
{
Contracts.Assert(0 <= col && col < ColumnCount);
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs
index 19b6d640cc..b23637b1a9 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs
@@ -212,7 +212,7 @@ public override ValueGetter GetIdGetter()
};
}
- public static void GetSomeLines(IMultiStreamSource source, int count, ref List lines)
+ public static void GetSomeLines(IMultiStreamSource source, int count, ref List> lines)
{
Contracts.AssertValue(source);
Contracts.Assert(count > 0);
@@ -236,7 +236,7 @@ public static void GetSomeLines(IMultiStreamSource source, int count, ref List
@@ -495,7 +495,7 @@ private void ThreadProc()
for (; ; )
{
// REVIEW: Avoid allocating a string for every line. This would probably require
- // introducing a CharSpan type (similar to DvText but based on char[] or StringBuilder)
+ // introducing a CharSpan type (similar to ReadOnlyMemory but based on char[] or StringBuilder)
// and implementing all the necessary conversion functionality on it. See task 3871.
text = rdr.ReadLine();
if (text == null)
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs
index c0d0f25b17..0d4331c59b 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs
@@ -228,7 +228,7 @@ protected ColumnPipe(RowSet rows)
public abstract void Reset(int irow, int size);
// Passed by-ref for effeciency, not so it can be modified.
- public abstract bool Consume(int irow, int index, ref DvText text);
+ public abstract bool Consume(int irow, int index, ref ReadOnlyMemory text);
public abstract Delegate GetGetter();
}
@@ -255,7 +255,7 @@ public override void Reset(int irow, int size)
_values[irow] = default(TResult);
}
- public override bool Consume(int irow, int index, ref DvText text)
+ public override bool Consume(int irow, int index, ref ReadOnlyMemory text)
{
Contracts.Assert(0 <= irow && irow < _values.Length);
Contracts.Assert(index == 0);
@@ -332,7 +332,7 @@ public void Reset(int size)
AssertValid();
}
- public bool Consume(int index, ref DvText text)
+ public bool Consume(int index, ref ReadOnlyMemory text)
{
AssertValid();
Contracts.Assert(_indexPrev < index & index < _size);
@@ -439,7 +439,7 @@ public override void Reset(int irow, int size)
_values[irow].Reset(size);
}
- public override bool Consume(int irow, int index, ref DvText text)
+ public override bool Consume(int irow, int index, ref ReadOnlyMemory text)
{
Contracts.Assert(0 <= irow && irow < _values.Length);
return _values[irow].Consume(index, ref text);
@@ -510,7 +510,7 @@ private struct ScanInfo
///
/// The current text for the entire line (all fields), and possibly more.
///
- public readonly string TextBuf;
+ public ReadOnlyMemory TextBuf;
///
/// The min position in to consider (all fields).
@@ -531,7 +531,7 @@ private struct ScanInfo
///
/// The (unquoted) text of the field.
///
- public DvText Span;
+ public ReadOnlyMemory Span;
///
/// Whether there was a quoting error in the field.
@@ -558,16 +558,17 @@ private struct ScanInfo
///
/// Initializes the ScanInfo.
///
- public ScanInfo(ref DvText text, string path, long line)
+ public ScanInfo(ref ReadOnlyMemory text, string path, long line)
: this()
{
- Contracts.Assert(!text.IsNA);
Contracts.AssertValueOrNull(path);
Contracts.Assert(line >= 0);
Path = path;
Line = line;
- TextBuf = text.GetRawUnderlyingBufferInfo(out IchMinBuf, out IchLimBuf);
+ TextBuf = text;
+ IchMinBuf = 0;
+ IchLimBuf = text.Length;
IchMinNext = IchMinBuf;
}
}
@@ -584,13 +585,13 @@ private sealed class FieldSet
// Source indices and associated text (parallel arrays).
public int[] Indices;
- public DvText[] Spans;
+ public ReadOnlyMemory[] Spans;
public FieldSet()
{
// Always allocate/size Columns after Spans so even if exceptions are thrown we
// are guaranteed that Spans.Length >= Columns.Length.
- Spans = new DvText[8];
+ Spans = new ReadOnlyMemory[8];
Indices = new int[8];
}
@@ -687,7 +688,7 @@ public Parser(TextLoader parent)
Contracts.Assert(_inputSize >= 0);
}
- public static void GetInputSize(TextLoader parent, List lines, out int minSize, out int maxSize)
+ public static void GetInputSize(TextLoader parent, List> lines, out int minSize, out int maxSize)
{
Contracts.AssertNonEmpty(lines);
Contracts.Assert(parent._inputSize == 0, "Why is this being called when inputSize is known?");
@@ -700,12 +701,12 @@ public static void GetInputSize(TextLoader parent, List lines, out int m
{
foreach (var line in lines)
{
- var text = (parent._flags & Options.TrimWhitespace) != 0 ? line.TrimEndWhiteSpace() : line;
- if (!text.HasChars)
+ var text = (parent._flags & Options.TrimWhitespace) != 0 ? ReadOnlyMemoryUtils.TrimEndWhiteSpace(line) : line;
+ if (text.IsEmpty)
continue;
// REVIEW: This is doing more work than we need, but makes sure we're consistent....
- int srcLim = impl.GatherFields(text);
+ int srcLim = impl.GatherFields(text, text.Span);
// Don't need the fields, just srcLim.
impl.Fields.Clear();
@@ -724,9 +725,9 @@ public static void GetInputSize(TextLoader parent, List lines, out int m
}
}
- public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[] infos, VBuffer[] slotNames)
+ public static void ParseSlotNames(TextLoader parent, ReadOnlyMemory textHeader, ColInfo[] infos, VBuffer>[] slotNames)
{
- Contracts.Assert(textHeader.HasChars);
+ Contracts.Assert(!textHeader.IsEmpty);
Contracts.Assert(infos.Length == slotNames.Length);
var sb = new StringBuilder();
@@ -734,7 +735,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[
var impl = new HelperImpl(stats, parent._flags, parent._separators, parent._inputSize, int.MaxValue);
try
{
- impl.GatherFields(textHeader);
+ impl.GatherFields(textHeader, textHeader.Span);
}
finally
{
@@ -742,7 +743,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[
}
var header = impl.Fields;
- var bldr = BufferBuilder.CreateDefault();
+ var bldr = BufferBuilder>.CreateDefault();
for (int iinfo = 0; iinfo < infos.Length; iinfo++)
{
var info = infos[iinfo];
@@ -771,7 +772,7 @@ public static void ParseSlotNames(TextLoader parent, DvText textHeader, ColInfo[
{
var srcCur = header.Indices[isrc];
Contracts.Assert(min <= srcCur & srcCur < lim);
- bldr.AddFeature(indexBase + srcCur, header.Spans[isrc].TrimWhiteSpace());
+ bldr.AddFeature(indexBase + srcCur, ReadOnlyMemoryUtils.TrimWhiteSpace(header.Spans[isrc]));
}
}
ivDst += sizeSeg;
@@ -795,6 +796,24 @@ public RowSet CreateRowSet(ParseStats stats, int count, bool[] active)
return rows;
}
+ ///
+ /// Returns a of with trailing whitespace trimmed.
+ ///
+ private ReadOnlyMemory TrimEndWhiteSpace(ReadOnlyMemory memory, ReadOnlySpan span)
+ {
+ if (memory.IsEmpty)
+ return memory;
+
+ int ichLim = memory.Length;
+ if (!char.IsWhiteSpace(span[ichLim - 1]))
+ return memory;
+
+ while (0 < ichLim && char.IsWhiteSpace(span[ichLim - 1]))
+ ichLim--;
+
+ return memory.Slice(0, ichLim);
+ }
+
public void ParseRow(RowSet rows, int irow, Helper helper, bool[] active, string path, long line, string text)
{
Contracts.AssertValue(rows);
@@ -803,13 +822,14 @@ public void ParseRow(RowSet rows, int irow, Helper helper, bool[] active, string
Contracts.Assert(active == null | Utils.Size(active) == _infos.Length);
var impl = (HelperImpl)helper;
- DvText lineSpan = new DvText(text);
+ var lineSpan = text.AsMemory();
+ var span = lineSpan.Span;
if ((_flags & Options.TrimWhitespace) != 0)
- lineSpan = lineSpan.TrimEndWhiteSpace();
+ lineSpan = TrimEndWhiteSpace(lineSpan, span);
try
{
// Parse the spans into items, ensuring that sparse don't precede non-sparse.
- int srcLim = impl.GatherFields(lineSpan, path, line);
+ int srcLim = impl.GatherFields(lineSpan, span, path, line);
impl.Fields.AssertValid();
// REVIEW: When should we report inconsistency?
@@ -855,7 +875,7 @@ private sealed class HelperImpl : Helper
private readonly StringBuilder _sb;
// Result of a blank field - either Missing or Empty, depending on _quoting.
- private readonly DvText _blank;
+ private readonly ReadOnlyMemory _blank;
public readonly FieldSet Fields;
@@ -878,7 +898,7 @@ public HelperImpl(ParseStats stats, Options flags, char[] seps, int inputSize, i
_quoting = (flags & Options.AllowQuoting) != 0;
_sparse = (flags & Options.AllowSparse) != 0;
_sb = new StringBuilder();
- _blank = _quoting ? DvText.NA : DvText.Empty;
+ _blank = ReadOnlyMemory.Empty;
Fields = new FieldSet();
}
@@ -902,7 +922,7 @@ private bool IsSep(char ch)
/// Process the line of text into fields, stored in the Fields field. Ensures that sparse
/// don't precede non-sparse. Returns the lim of the src columns.
///
- public int GatherFields(DvText lineSpan, string path = null, long line = 0)
+ public int GatherFields(ReadOnlyMemory lineSpan, ReadOnlySpan span, string path = null, long line = 0)
{
Fields.AssertEmpty();
@@ -915,7 +935,7 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0)
for (; ; )
{
Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf);
- bool more = FetchNextField(ref scan);
+ bool more = FetchNextField(ref scan, span);
Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf);
Contracts.Assert(scan.Index == -1);
@@ -946,7 +966,7 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0)
for (; ; )
{
Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf);
- bool more = FetchNextField(ref scan);
+ bool more = FetchNextField(ref scan, span);
Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf);
Contracts.Assert(scan.Index >= -1);
@@ -992,16 +1012,24 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0)
}
var spanT = Fields.Spans[Fields.Count - 1];
- // Note that Convert produces NA if the text is unparsable.
- DvInt4 csrc = default(DvInt4);
- Conversion.Conversions.Instance.Convert(ref spanT, ref csrc);
- csrcSparse = csrc.RawValue;
- if (csrcSparse <= 0)
+ // Note that Convert throws exception the text is unparsable.
+ int csrc = default;
+ try
+ {
+ Conversions.Instance.Convert(ref spanT, ref csrc);
+ }
+ catch
+ {
+ Contracts.Assert(csrc == default);
+ }
+
+ if (csrc <= 0)
{
_stats.LogBadFmt(ref scan, "Bad dimensionality or ambiguous sparse item. Use sparse=- for non-sparse file, and/or quote the value.");
break;
}
+ csrcSparse = csrc;
srcLimFixed = Fields.Indices[--Fields.Count];
if (csrcSparse >= SrcLim - srcLimFixed)
csrcSparse = SrcLim - srcLimFixed - 1;
@@ -1065,18 +1093,17 @@ public int GatherFields(DvText lineSpan, string path = null, long line = 0)
return inputSize;
}
- private bool FetchNextField(ref ScanInfo scan)
+ private bool FetchNextField(ref ScanInfo scan, ReadOnlySpan span)
{
Contracts.Assert(scan.IchMinBuf <= scan.IchMinNext && scan.IchMinNext <= scan.IchLimBuf);
var text = scan.TextBuf;
int ichLim = scan.IchLimBuf;
int ichCur = scan.IchMinNext;
-
if (!_sepContainsSpace)
{
// Ignore leading spaces
- while (ichCur < ichLim && text[ichCur] == ' ')
+ while (ichCur < ichLim && span[ichCur] == ' ')
ichCur++;
}
@@ -1093,29 +1120,29 @@ private bool FetchNextField(ref ScanInfo scan)
}
int ichMinRaw = ichCur;
- if (_sparse && (uint)(text[ichCur] - '0') <= 9)
+ if (_sparse && (uint)(span[ichCur] - '0') <= 9)
{
// See if it is sparse. Avoid overflow by limiting the index to 9 digits.
// REVIEW: This limits the src index to a billion. Is this acceptable?
int ichEnd = Math.Min(ichLim, ichCur + 9);
int ichCol = ichCur + 1;
Contracts.Assert(ichCol <= ichEnd);
- while (ichCol < ichEnd && (uint)(text[ichCol] - '0') <= 9)
+ while (ichCol < ichEnd && (uint)(span[ichCol] - '0') <= 9)
ichCol++;
- if (ichCol < ichLim && text[ichCol] == ':')
+ if (ichCol < ichLim && span[ichCol] == ':')
{
// It is sparse. Compute the index.
int ind = 0;
for (int ich = ichCur; ich < ichCol; ich++)
- ind = ind * 10 + (text[ich] - '0');
+ ind = ind * 10 + (span[ich] - '0');
ichCur = ichCol + 1;
scan.Index = ind;
// Skip spaces again.
if (!_sepContainsSpace)
{
- while (ichCur < ichLim && text[ichCur] == ' ')
+ while (ichCur < ichLim && span[ichCur] == ' ')
ichCur++;
}
@@ -1129,7 +1156,7 @@ private bool FetchNextField(ref ScanInfo scan)
}
Contracts.Assert(ichCur < ichLim);
- if (text[ichCur] == '"' && _quoting)
+ if (span[ichCur] == '"' && _quoting)
{
// Quoted case.
ichCur++;
@@ -1144,13 +1171,13 @@ private bool FetchNextField(ref ScanInfo scan)
scan.QuotingError = true;
break;
}
- if (text[ichCur] == '"')
+ if (span[ichCur] == '"')
{
if (ichCur > ichRun)
- _sb.Append(text, ichRun, ichCur - ichRun);
+ _sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun));
if (++ichCur >= ichLim)
break;
- if (text[ichCur] != '"')
+ if (span[ichCur] != '"')
break;
ichRun = ichCur;
}
@@ -1159,7 +1186,7 @@ private bool FetchNextField(ref ScanInfo scan)
// Ignore any spaces between here and the next separator. Anything else is a formatting "error".
for (; ichCur < ichLim; ichCur++)
{
- if (text[ichCur] == ' ')
+ if (span[ichCur] == ' ')
{
// End the loop if space is a sep, otherwise ignore this space.
if (_sepContainsSpace)
@@ -1168,18 +1195,16 @@ private bool FetchNextField(ref ScanInfo scan)
else
{
// End the loop if this nonspace char is a sep, otherwise it is an error.
- if (IsSep(text[ichCur]))
+ if (IsSep(span[ichCur]))
break;
scan.QuotingError = true;
}
}
- if (scan.QuotingError)
- scan.Span = DvText.NA;
- else if (_sb.Length == 0)
- scan.Span = DvText.Empty;
+ if (scan.QuotingError || _sb.Length == 0)
+ scan.Span = String.Empty.AsMemory();
else
- scan.Span = new DvText(_sb.ToString());
+ scan.Span = _sb.ToString().AsMemory();
}
else
{
@@ -1193,7 +1218,7 @@ private bool FetchNextField(ref ScanInfo scan)
Contracts.Assert(ichCur <= ichLim);
if (ichCur >= ichLim)
break;
- if (_sep0 == text[ichCur])
+ if (_sep0 == span[ichCur])
break;
}
}
@@ -1204,7 +1229,7 @@ private bool FetchNextField(ref ScanInfo scan)
Contracts.Assert(ichCur <= ichLim);
if (ichCur >= ichLim)
break;
- if (_sep0 == text[ichCur] || _sep1 == text[ichCur])
+ if (_sep0 == span[ichCur] || _sep1 == span[ichCur])
break;
}
}
@@ -1215,7 +1240,7 @@ private bool FetchNextField(ref ScanInfo scan)
Contracts.Assert(ichCur <= ichLim);
if (ichCur >= ichLim)
break;
- if (IsSep(text[ichCur]))
+ if (IsSep(span[ichCur]))
break;
}
}
@@ -1223,7 +1248,7 @@ private bool FetchNextField(ref ScanInfo scan)
if (ichMin >= ichCur)
scan.Span = _blank;
else
- scan.Span = new DvText(text, ichMin, ichCur);
+ scan.Span = text.Slice(ichMin, ichCur - ichMin);
}
scan.IchLim = ichCur;
@@ -1233,7 +1258,7 @@ private bool FetchNextField(ref ScanInfo scan)
return false;
}
- Contracts.Assert(_seps.Contains(text[ichCur]));
+ Contracts.Assert(_seps.Contains(span[ichCur]));
scan.IchMinNext = ichCur + 1;
return true;
}
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs
index 48f3f9ddc3..64cf7f7faf 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs
@@ -94,22 +94,22 @@ protected ValueWriterBase(PrimitiveType type, int source, char sep)
if (type.IsText)
{
// For text we need to deal with escaping.
- ValueMapper c = MapText;
+ ValueMapper, StringBuilder> c = MapText;
Conv = (ValueMapper)(Delegate)c;
}
else if (type.IsTimeSpan)
{
- ValueMapper c = MapTimeSpan;
+ ValueMapper c = MapTimeSpan;
Conv = (ValueMapper)(Delegate)c;
}
else if (type.IsDateTime)
{
- ValueMapper c = MapDateTime;
+ ValueMapper c = MapDateTime;
Conv = (ValueMapper)(Delegate)c;
}
else if (type.IsDateTimeZone)
{
- ValueMapper c = MapDateTimeZone;
+ ValueMapper c = MapDateTimeZone;
Conv = (ValueMapper)(Delegate)c;
}
else
@@ -120,22 +120,22 @@ protected ValueWriterBase(PrimitiveType type, int source, char sep)
Default = Sb.ToString();
}
- protected void MapText(ref DvText src, ref StringBuilder sb)
+ protected void MapText(ref ReadOnlyMemory src, ref StringBuilder sb)
{
- TextSaverUtils.MapText(ref src, ref sb, Sep);
+ TextSaverUtils.MapText(src.Span, ref sb, Sep);
}
- protected void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb)
+ protected void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb)
{
TextSaverUtils.MapTimeSpan(ref src, ref sb);
}
- protected void MapDateTime(ref DvDateTime src, ref StringBuilder sb)
+ protected void MapDateTime(ref DateTime src, ref StringBuilder sb)
{
TextSaverUtils.MapDateTime(ref src, ref sb);
}
- protected void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb)
+ protected void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb)
{
TextSaverUtils.MapDateTimeZone(ref src, ref sb);
}
@@ -145,7 +145,7 @@ private sealed class VecValueWriter : ValueWriterBase
{
private readonly ValueGetter> _getSrc;
private VBuffer _src;
- private readonly VBuffer _slotNames;
+ private readonly VBuffer> _slotNames;
private readonly int _slotCount;
public VecValueWriter(IRowCursor cursor, VectorType type, int source, char sep)
@@ -225,7 +225,7 @@ public override void WriteData(Action appendItem, out int le
public override void WriteHeader(Action appendItem, out int length)
{
- var span = new DvText(_columnName);
+ var span = _columnName.AsMemory();
MapText(ref span, ref Sb);
appendItem(Sb, 0);
length = 1;
@@ -796,29 +796,28 @@ private void WriteDenseTo(int dstLim, string defaultStr = null)
internal static class TextSaverUtils
{
///
- /// Converts a DvText to a StringBuilder using TextSaver escaping and string quoting rules.
+ /// Converts a ReadOnlySpan to a StringBuilder using TextSaver escaping and string quoting rules.
///
- internal static void MapText(ref DvText src, ref StringBuilder sb, char sep)
+ internal static void MapText(ReadOnlySpan span, ref StringBuilder sb, char sep)
{
if (sb == null)
sb = new StringBuilder();
else
sb.Clear();
- if (src.IsEmpty)
+ if (span.IsEmpty)
sb.Append("\"\"");
- else if (!src.IsNA)
+ else
{
- int ichMin;
- int ichLim;
- string text = src.GetRawUnderlyingBufferInfo(out ichMin, out ichLim);
+ int ichMin = 0;
+ int ichLim = span.Length;
int ichCur = ichMin;
int ichRun = ichCur;
bool quoted = false;
// Strings that start with space need to be quoted.
Contracts.Assert(ichCur < ichLim);
- if (text[ichCur] == ' ')
+ if (span[ichCur] == ' ')
{
quoted = true;
sb.Append('"');
@@ -826,7 +825,7 @@ internal static void MapText(ref DvText src, ref StringBuilder sb, char sep)
for (; ichCur < ichLim; ichCur++)
{
- char ch = text[ichCur];
+ char ch = span[ichCur];
if (ch != '"' && ch != sep && ch != ':')
continue;
if (!quoted)
@@ -838,47 +837,47 @@ internal static void MapText(ref DvText src, ref StringBuilder sb, char sep)
if (ch == '"')
{
if (ichRun < ichCur)
- sb.Append(text, ichRun, ichCur - ichRun);
+ sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun));
sb.Append("\"\"");
ichRun = ichCur + 1;
}
}
Contracts.Assert(ichCur == ichLim);
if (ichRun < ichCur)
- sb.Append(text, ichRun, ichCur - ichRun);
+ sb.AppendSpan(span.Slice(ichRun, ichCur - ichRun));
if (quoted)
sb.Append('"');
}
}
- internal static void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb)
+ internal static void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb)
{
if (sb == null)
sb = new StringBuilder();
else
sb.Clear();
- if (!src.IsNA)
- sb.AppendFormat("\"{0:c}\"", (TimeSpan)src);
+
+ sb.AppendFormat("\"{0:c}\"", src);
}
- internal static void MapDateTime(ref DvDateTime src, ref StringBuilder sb)
+ internal static void MapDateTime(ref DateTime src, ref StringBuilder sb)
{
if (sb == null)
sb = new StringBuilder();
else
sb.Clear();
- if (!src.IsNA)
- sb.AppendFormat("\"{0:o}\"", (DateTime)src);
+
+ sb.AppendFormat("\"{0:o}\"", src);
}
- internal static void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb)
+ internal static void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb)
{
if (sb == null)
sb = new StringBuilder();
else
sb.Clear();
- if (!src.IsNA)
- sb.AppendFormat("\"{0:o}\"", (DateTimeOffset)src);
+
+ sb.AppendFormat("\"{0:o}\"", src);
}
}
}
diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs
index 098c652203..98b0dba355 100644
--- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs
+++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs
@@ -21,8 +21,8 @@ public sealed class ArrayDataViewBuilder
private readonly IHost _host;
private readonly List _columns;
private readonly List _names;
- private readonly Dictionary>> _getSlotNames;
- private readonly Dictionary>> _getKeyValues;
+ private readonly Dictionary>>> _getSlotNames;
+ private readonly Dictionary>>> _getKeyValues;
private int? RowCount
{
@@ -41,8 +41,8 @@ public ArrayDataViewBuilder(IHostEnvironment env)
_columns = new List();
_names = new List();
- _getSlotNames = new Dictionary>>();
- _getKeyValues = new Dictionary>>();
+ _getSlotNames = new Dictionary