|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | +// See the LICENSE file in the project root for more information. |
| 4 | + |
| 5 | +using Microsoft.ML.Core.Data; |
| 6 | +using Microsoft.ML.Data.DataLoadSave; |
| 7 | +using Microsoft.ML.Runtime; |
| 8 | +using Microsoft.ML.Runtime.Data; |
| 9 | +using Microsoft.ML.Runtime.Data.IO; |
| 10 | +using Microsoft.ML.Runtime.Model; |
| 11 | +using System.Collections.Generic; |
| 12 | + |
| 13 | +[assembly: LoadableClass(typeof(TransformWrapper), null, typeof(SignatureLoadModel), |
| 14 | + "Transform wrapper", TransformWrapper.LoaderSignature)] |
| 15 | + |
| 16 | +namespace Microsoft.ML.Runtime.Data |
| 17 | +{ |
| 18 | + // REVIEW: this class is public, as long as the Wrappers.cs in tests still rely on it. |
| 19 | + // It needs to become internal. |
| 20 | + public sealed class TransformWrapper : ITransformer, ICanSaveModel |
| 21 | + { |
| 22 | + public const string LoaderSignature = "TransformWrapper"; |
| 23 | + private const string TransformDirTemplate = "Step_{0:000}"; |
| 24 | + |
| 25 | + private readonly IHost _host; |
| 26 | + private readonly IDataView _xf; |
| 27 | + |
| 28 | + public TransformWrapper(IHostEnvironment env, IDataView xf) |
| 29 | + { |
| 30 | + Contracts.CheckValue(env, nameof(env)); |
| 31 | + _host = env.Register(nameof(TransformWrapper)); |
| 32 | + _host.CheckValue(xf, nameof(xf)); |
| 33 | + _xf = xf; |
| 34 | + } |
| 35 | + |
| 36 | + public ISchema GetOutputSchema(ISchema inputSchema) |
| 37 | + { |
| 38 | + _host.CheckValue(inputSchema, nameof(inputSchema)); |
| 39 | + |
| 40 | + var dv = new EmptyDataView(_host, inputSchema); |
| 41 | + var output = ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, dv); |
| 42 | + return output.Schema; |
| 43 | + } |
| 44 | + |
| 45 | + public void Save(ModelSaveContext ctx) |
| 46 | + { |
| 47 | + ctx.CheckAtModel(); |
| 48 | + ctx.SetVersionInfo(GetVersionInfo()); |
| 49 | + |
| 50 | + var dataPipe = _xf; |
| 51 | + var transforms = new List<IDataTransform>(); |
| 52 | + while (dataPipe is IDataTransform xf) |
| 53 | + { |
| 54 | + // REVIEW: a malicious user could construct a loop in the Source chain, that would |
| 55 | + // cause this method to iterate forever (and throw something when the list overflows). There's |
| 56 | + // no way to insulate from ALL malicious behavior. |
| 57 | + transforms.Add(xf); |
| 58 | + dataPipe = xf.Source; |
| 59 | + Contracts.AssertValue(dataPipe); |
| 60 | + } |
| 61 | + transforms.Reverse(); |
| 62 | + |
| 63 | + ctx.SaveSubModel("Loader", c => BinaryLoader.SaveInstance(_host, c, dataPipe.Schema)); |
| 64 | + |
| 65 | + ctx.Writer.Write(transforms.Count); |
| 66 | + for (int i = 0; i < transforms.Count; i++) |
| 67 | + { |
| 68 | + var dirName = string.Format(TransformDirTemplate, i); |
| 69 | + ctx.SaveModel(transforms[i], dirName); |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + private static VersionInfo GetVersionInfo() |
| 74 | + { |
| 75 | + return new VersionInfo( |
| 76 | + modelSignature: "XF WRPR", |
| 77 | + verWrittenCur: 0x00010001, // Initial |
| 78 | + verReadableCur: 0x00010001, |
| 79 | + verWeCanReadBack: 0x00010001, |
| 80 | + loaderSignature: LoaderSignature); |
| 81 | + } |
| 82 | + |
| 83 | + // Factory for SignatureLoadModel. |
| 84 | + public TransformWrapper(IHostEnvironment env, ModelLoadContext ctx) |
| 85 | + { |
| 86 | + Contracts.CheckValue(env, nameof(env)); |
| 87 | + _host = env.Register(nameof(TransformWrapper)); |
| 88 | + _host.CheckValue(ctx, nameof(ctx)); |
| 89 | + |
| 90 | + ctx.CheckAtModel(GetVersionInfo()); |
| 91 | + int n = ctx.Reader.ReadInt32(); |
| 92 | + _host.CheckDecode(n >= 0); |
| 93 | + |
| 94 | + ctx.LoadModel<IDataLoader, SignatureLoadDataLoader>(env, out var loader, "Loader", new MultiFileSource(null)); |
| 95 | + |
| 96 | + IDataView data = loader; |
| 97 | + for (int i = 0; i < n; i++) |
| 98 | + { |
| 99 | + var dirName = string.Format(TransformDirTemplate, i); |
| 100 | + ctx.LoadModel<IDataTransform, SignatureLoadDataTransform>(env, out var xf, dirName, data); |
| 101 | + data = xf; |
| 102 | + } |
| 103 | + |
| 104 | + _xf = data; |
| 105 | + } |
| 106 | + |
| 107 | + public IDataView Transform(IDataView input) => ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, input); |
| 108 | + } |
| 109 | + |
| 110 | + /// <summary> |
| 111 | + /// Estimator for trained wrapped transformers. |
| 112 | + /// </summary> |
| 113 | + internal abstract class TrainedWrapperEstimatorBase : IEstimator<TransformWrapper> |
| 114 | + { |
| 115 | + private readonly IHost _host; |
| 116 | + |
| 117 | + protected TrainedWrapperEstimatorBase(IHost host) |
| 118 | + { |
| 119 | + Contracts.CheckValue(host, nameof(host)); |
| 120 | + _host = host; |
| 121 | + } |
| 122 | + |
| 123 | + public abstract TransformWrapper Fit(IDataView input); |
| 124 | + |
| 125 | + public SchemaShape GetOutputSchema(SchemaShape inputSchema) |
| 126 | + { |
| 127 | + _host.CheckValue(inputSchema, nameof(inputSchema)); |
| 128 | + |
| 129 | + var fakeSchema = new FakeSchema(_host, inputSchema); |
| 130 | + var transformer = Fit(new EmptyDataView(_host, fakeSchema)); |
| 131 | + return SchemaShape.Create(transformer.GetOutputSchema(fakeSchema)); |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + /// <summary> |
| 136 | + /// Estimator for untrained wrapped transformers. |
| 137 | + /// </summary> |
| 138 | + public abstract class TrivialWrapperEstimator : TrivialEstimator<TransformWrapper> |
| 139 | + { |
| 140 | + protected TrivialWrapperEstimator(IHost host, TransformWrapper transformer) |
| 141 | + : base(host, transformer) |
| 142 | + { |
| 143 | + } |
| 144 | + |
| 145 | + public override SchemaShape GetOutputSchema(SchemaShape inputSchema) |
| 146 | + { |
| 147 | + Host.CheckValue(inputSchema, nameof(inputSchema)); |
| 148 | + var fakeSchema = new FakeSchema(Host, inputSchema); |
| 149 | + return SchemaShape.Create(Transformer.GetOutputSchema(fakeSchema)); |
| 150 | + } |
| 151 | + } |
| 152 | +} |
0 commit comments