|
1 |
| -using Float = System.Single; |
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | +// See the LICENSE file in the project root for more information. |
2 | 4 |
|
3 | 5 | using System;
|
4 |
| -using System.Collections.Concurrent; |
5 | 6 | using System.Collections.Generic;
|
6 |
| -using System.IO; |
7 | 7 | using System.Linq;
|
8 |
| -using System.Text; |
9 |
| -using System.Threading; |
10 |
| -using Microsoft.ML.Runtime.Internal.Utilities; |
11 |
| -using Microsoft.ML.Data.StaticPipe.Runtime; |
12 | 8 | using Microsoft.ML.Core.Data;
|
| 9 | +using Microsoft.ML.Data.StaticPipe.Runtime; |
| 10 | +using Microsoft.ML.Data.StaticPipe; |
| 11 | +using Microsoft.ML.Runtime.Internal.Utilities; |
13 | 12 |
|
14 | 13 | namespace Microsoft.ML.Runtime.Data
|
15 | 14 | {
|
16 | 15 | public sealed partial class TextLoader
|
17 | 16 | {
|
18 |
| - private sealed class TextReconciler : ReaderReconciler<IMultiStreamSource> |
| 17 | + public static DataReader<IMultiStreamSource, TTupleShape> CreateReader<TTupleShape>( |
| 18 | + IHostEnvironment env, Func<Context, TTupleShape> func, IMultiStreamSource files = null, |
| 19 | + bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true, |
| 20 | + bool trimWhitspace = false) |
19 | 21 | {
|
20 |
| - public static readonly TextReconciler Inst = new TextReconciler(); |
| 22 | + Contracts.CheckValue(env, nameof(env)); |
| 23 | + env.CheckValue(func, nameof(func)); |
| 24 | + env.CheckValueOrNull(files); |
21 | 25 |
|
22 |
| - public override IDataReaderEstimator<IMultiStreamSource, IDataReader<IMultiStreamSource>> Reconcile( |
23 |
| - PipelineColumn[] toOutput, Dictionary<PipelineColumn, string> outputNames) |
| 26 | + // Populate all args except the columns. |
| 27 | + var args = new Arguments(); |
| 28 | + args.AllowQuoting = allowQuoting; |
| 29 | + args.AllowSparse = allowSparse; |
| 30 | + args.HasHeader = hasHeader; |
| 31 | + args.SeparatorChars = new[] { separator }; |
| 32 | + args.TrimWhitespace = trimWhitspace; |
| 33 | + |
| 34 | + var rec = new TextReconciler(args, files); |
| 35 | + var ctx = new Context(rec); |
| 36 | + |
| 37 | + using (var ch = env.Start("Initializing " + nameof(TextLoader))) |
24 | 38 | {
|
25 |
| - //return new FakeReaderEstimator<IMultiStreamSource>(); |
26 |
| - return null; |
| 39 | + var readerEst = StaticPipeUtils.ReaderEstimatorAnalyzerHelper(env, ch, ctx, rec, func); |
| 40 | + Contracts.AssertValue(readerEst); |
| 41 | + var reader = readerEst.Fit(files); |
| 42 | + ch.Done(); |
| 43 | + return reader; |
27 | 44 | }
|
28 | 45 | }
|
29 | 46 |
|
30 |
| - public sealed class Context |
| 47 | + private sealed class TextReconciler : ReaderReconciler<IMultiStreamSource> |
31 | 48 | {
|
32 |
| - private class MyScalar<T> : Scalar<T> |
| 49 | + private readonly Arguments _args; |
| 50 | + private readonly IMultiStreamSource _files; |
| 51 | + |
| 52 | + public TextReconciler(Arguments args, IMultiStreamSource files) |
33 | 53 | {
|
34 |
| - public readonly int Ordinal; |
| 54 | + Contracts.AssertValue(args); |
| 55 | + Contracts.AssertValueOrNull(files); |
35 | 56 |
|
36 |
| - public MyScalar(int ordinal) |
37 |
| - : base(TextReconciler.Inst, null) |
38 |
| - { |
39 |
| - Ordinal = ordinal; |
40 |
| - } |
| 57 | + _args = args; |
| 58 | + _files = files; |
41 | 59 | }
|
42 | 60 |
|
43 |
| - private class MyVector<T> : Vector<T> |
| 61 | + public override IDataReaderEstimator<IMultiStreamSource, IDataReader<IMultiStreamSource>> Reconcile( |
| 62 | + IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary<PipelineColumn, string> outputNames) |
44 | 63 | {
|
45 |
| - public readonly int? Min; |
46 |
| - public readonly int? Max; |
| 64 | + Contracts.AssertValue(env); |
| 65 | + Contracts.AssertValue(toOutput); |
| 66 | + Contracts.AssertValue(outputNames); |
| 67 | + Contracts.Assert(_args.Column == null); |
47 | 68 |
|
48 |
| - public MyVector(int? min, int? max) |
49 |
| - : base(TextReconciler.Inst, null) |
| 69 | + Column Create(PipelineColumn pipelineCol) |
50 | 70 | {
|
51 |
| - Min = min; |
52 |
| - Max = max; |
| 71 | + var pipelineArgCol = (IPipelineArgColumn)pipelineCol; |
| 72 | + var argCol = pipelineArgCol.Create(); |
| 73 | + argCol.Name = outputNames[pipelineCol]; |
| 74 | + return argCol; |
53 | 75 | }
|
| 76 | + |
| 77 | + var cols = _args.Column = new Column[toOutput.Length]; |
| 78 | + for (int i = 0; i < toOutput.Length; ++i) |
| 79 | + cols[i] = Create(toOutput[i]); |
| 80 | + |
| 81 | + var orig = new TextLoader(env, _args, _files); |
| 82 | + return new TrivialReaderEstimator<IMultiStreamSource, TextLoader>(orig); |
54 | 83 | }
|
| 84 | + } |
55 | 85 |
|
56 |
| - public Scalar<bool> LoadBool(int ordinal) => Load<bool>(ordinal); |
57 |
| - public Vector<bool> LoadBool(int minOrdinal, int? maxOrdinal) => Load<bool>(minOrdinal, maxOrdinal); |
58 |
| - public Scalar<float> LoadFloat(int ordinal) => Load<float>(ordinal); |
59 |
| - public Vector<float> LoadFloat(int minOrdinal, int? maxOrdinal) => Load<float>(minOrdinal, maxOrdinal); |
60 |
| - public Scalar<double> LoadDouble(int ordinal) => Load<double>(ordinal); |
61 |
| - public Vector<double> LoadDouble(int minOrdinal, int? maxOrdinal) => Load<double>(minOrdinal, maxOrdinal); |
62 |
| - public Scalar<string> LoadText(int ordinal) => Load<string>(ordinal); |
63 |
| - public Vector<string> LoadText(int minOrdinal, int? maxOrdinal) => Load<string>(minOrdinal, maxOrdinal); |
| 86 | + private interface IPipelineArgColumn |
| 87 | + { |
| 88 | + /// <summary> |
| 89 | + /// Creates a <see cref="Column"/> object corresponding to the <see cref="PipelineColumn"/>, with everything |
| 90 | + /// filled in except <see cref="ColInfo.Name"/>. |
| 91 | + /// </summary> |
| 92 | + Column Create(); |
| 93 | + } |
64 | 94 |
|
65 |
| - private Scalar<T> Load<T>(int ordinal) |
| 95 | + public sealed class Context |
| 96 | + { |
| 97 | + private readonly Reconciler _rec; |
| 98 | + |
| 99 | + internal Context(Reconciler rec) |
| 100 | + { |
| 101 | + Contracts.AssertValue(rec); |
| 102 | + _rec = rec; |
| 103 | + } |
| 104 | + |
| 105 | + public Scalar<bool> LoadBool(int ordinal) => Load<bool>(DataKind.BL, ordinal); |
| 106 | + public Vector<bool> LoadBool(int minOrdinal, int? maxOrdinal) => Load<bool>(DataKind.BL, minOrdinal, maxOrdinal); |
| 107 | + public Scalar<float> LoadFloat(int ordinal) => Load<float>(DataKind.R4, ordinal); |
| 108 | + public Vector<float> LoadFloat(int minOrdinal, int? maxOrdinal) => Load<float>(DataKind.R4, minOrdinal, maxOrdinal); |
| 109 | + public Scalar<double> LoadDouble(int ordinal) => Load<double>(DataKind.R8, ordinal); |
| 110 | + public Vector<double> LoadDouble(int minOrdinal, int? maxOrdinal) => Load<double>(DataKind.R8, minOrdinal, maxOrdinal); |
| 111 | + public Scalar<string> LoadText(int ordinal) => Load<string>(DataKind.TX, ordinal); |
| 112 | + public Vector<string> LoadText(int minOrdinal, int? maxOrdinal) => Load<string>(DataKind.TX, minOrdinal, maxOrdinal); |
| 113 | + |
| 114 | + private Scalar<T> Load<T>(DataKind kind, int ordinal) |
66 | 115 | {
|
67 | 116 | Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Should be non-negative");
|
68 |
| - return new MyScalar<T>(ordinal); |
| 117 | + return new MyScalar<T>(_rec, kind, ordinal); |
69 | 118 | }
|
70 | 119 |
|
71 |
| - private Vector<T> Load<T>(int minOrdinal, int? maxOrdinal) |
| 120 | + private Vector<T> Load<T>(DataKind kind, int minOrdinal, int? maxOrdinal) |
72 | 121 | {
|
73 | 122 | Contracts.CheckParam(minOrdinal >= 0, nameof(minOrdinal), "Should be non-negative");
|
74 | 123 | var v = maxOrdinal >= minOrdinal;
|
75 | 124 | Contracts.CheckParam(!(maxOrdinal < minOrdinal), nameof(maxOrdinal), "If specified, cannot be less than " + nameof(minOrdinal));
|
76 |
| - return new MyVector<T>(minOrdinal, maxOrdinal); |
| 125 | + return new MyVector<T>(_rec, kind, minOrdinal, maxOrdinal); |
| 126 | + } |
| 127 | + |
| 128 | + private class MyScalar<T> : Scalar<T>, IPipelineArgColumn |
| 129 | + { |
| 130 | + private readonly DataKind _kind; |
| 131 | + private readonly int _ordinal; |
| 132 | + |
| 133 | + public MyScalar(Reconciler rec, DataKind kind, int ordinal) |
| 134 | + : base(rec, null) |
| 135 | + { |
| 136 | + _kind = kind; |
| 137 | + _ordinal = ordinal; |
| 138 | + } |
| 139 | + |
| 140 | + public Column Create() |
| 141 | + { |
| 142 | + return new Column() |
| 143 | + { |
| 144 | + Type = _kind, |
| 145 | + Source = new[] { new Range(_ordinal) }, |
| 146 | + }; |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + private class MyVector<T> : Vector<T>, IPipelineArgColumn |
| 151 | + { |
| 152 | + private readonly DataKind _kind; |
| 153 | + private readonly int _min; |
| 154 | + private readonly int? _max; |
| 155 | + |
| 156 | + public MyVector(Reconciler rec, DataKind kind, int min, int? max) |
| 157 | + : base(rec, null) |
| 158 | + { |
| 159 | + _kind = kind; |
| 160 | + _min = min; |
| 161 | + _max = max; |
| 162 | + } |
| 163 | + |
| 164 | + public Column Create() |
| 165 | + { |
| 166 | + return new Column() |
| 167 | + { |
| 168 | + Type = _kind, |
| 169 | + Source = new[] { new Range(_min, _max) }, |
| 170 | + }; |
| 171 | + } |
77 | 172 | }
|
78 | 173 | }
|
79 | 174 | }
|
80 | 175 | }
|
| 176 | + |
0 commit comments