Skip to content

Commit 3ad26b4

Browse files
committed
Added buffer re-use while reading the image in netstandard 2.0. Addressed Eric's comments. Changed ImageLoadingTransformer to take a bool type instead of a DataViewType to make it user friendly. (type = true means we are using VBuffer<byte> , type = false means we are using ImageDataViewType)
1 parent c67dd08 commit 3ad26b4

File tree

8 files changed

+83
-51
lines changed

8 files changed

+83
-51
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,12 @@ public static void Example()
5656

5757
IDataView trainDataset = trainTestData.TrainSet;
5858
IDataView testDataset = trainTestData.TestSet;
59-
var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, new VectorDataViewType(NumberDataViewType.Byte), "ImagePath")
59+
60+
var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, true, "ImagePath") // true indicates we want the image as a VBuffer<byte>
6061
.Fit(testDataset)
6162
.Transform(testDataset);
6263

63-
var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, new VectorDataViewType(NumberDataViewType.Byte), "ImagePath")
64+
var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, true, "ImagePath") // true indicates we want the image as a VBuffer<byte>
6465
.Append(mlContext.Model.ImageClassification(
6566
"Image", "Label",
6667
// Just by changing/selecting InceptionV3 here instead of

pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk" DefaultTargets="Pack">
22

33
<PropertyGroup>
4-
<TargetFramework>netstandard2.0</TargetFramework>
4+
<TargetFrameworks>netstandard2.0;netstandard2.1;netcoreapp2.1</TargetFrameworks>
55
<PackageDescription>ML.NET component for Image support</PackageDescription>
66
</PropertyGroup>
77

src/Microsoft.ML.Core/Data/IEstimator.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ internal Column(string name, VectorKind vecKind, DataViewType itemType, bool isK
6464
Contracts.CheckNonEmpty(name, nameof(name));
6565
Contracts.CheckValueOrNull(annotations);
6666
Contracts.CheckParam(!(itemType is KeyDataViewType), nameof(itemType), "Item type cannot be a key");
67-
//Contracts.CheckParam(!(itemType is VectorDataViewType), nameof(itemType), "Item type cannot be a vector");
67+
Contracts.CheckParam(!(itemType is VectorDataViewType), nameof(itemType), "Item type cannot be a vector");
6868
Contracts.CheckParam(!isKey || KeyDataViewType.IsValidDataType(itemType.RawType), nameof(itemType), "The item type must be valid for a key");
6969

7070
Name = name;

src/Microsoft.ML.Dnn/ImageClassificationTransform.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
12081208
if (!inputSchema.TryFindColumn(input, out var col))
12091209
throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input);
12101210
var expectedType = _inputTypes[i];
1211-
if (!col.ItemType.Equals(expectedType))
1211+
if (!col.ItemType.Equals(expectedType.GetItemType()))
12121212
throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString());
12131213
}
12141214
for (var i = 0; i < _options.OutputColumns.Length; i++)

src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,14 @@ internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog,
107107
/// <param name="inputColumnName">Name of the column with paths to the images to load.
108108
/// This estimator operates over text data.</param>
109109
/// <param name="imageFolder">Folder where to look for images.</param>
110-
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
110+
/// <param name="type">Image type flag - If true loads image as a VectorDataView type else loads image as ImageDataViewType. Defaults to ImageDataViewType if not specified or false.</param>
111111
/// <example>
112112
/// <format type="text/markdown">
113113
/// <![CDATA[
114114
/// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)]
115115
/// ]]></format>
116116
/// </example>
117-
public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, DataViewType type, string inputColumnName = null)
117+
public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, bool type, string inputColumnName = null)
118118
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, type, new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
119119

120120
/// <summary>
@@ -129,7 +129,7 @@ public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, s
129129
/// </remarks>
130130
/// <param name="catalog">The transform's catalog.</param>
131131
/// <param name="imageFolder">Folder where to look for images.</param>
132-
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
132+
/// <param name="type">Image type flag - If true loads image as a VectorDataView type else loads image as ImageDataViewType. Defaults to ImageDataViewType if not specified or false.</param>
133133
/// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
134134
/// <example>
135135
/// <format type="text/markdown">
@@ -138,7 +138,7 @@ public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, s
138138
/// ]]></format>
139139
/// </example>
140140
[BestFriend]
141-
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, DataViewType type, params InputOutputColumnPair[] columns)
141+
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, bool type, params InputOutputColumnPair[] columns)
142142
{
143143
var env = CatalogUtils.GetEnvironment(catalog);
144144
env.CheckValue(columns, nameof(columns));

src/Microsoft.ML.ImageAnalytics/ImageLoader.cs

+70-34
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6-
using System.Buffers;
76
using System.Collections.Generic;
87
using System.Drawing;
98
using System.IO;
109
using System.Linq;
10+
using System.Runtime.InteropServices;
11+
using System.Security.Cryptography;
1112
using System.Text;
1213
using Microsoft.ML;
1314
using Microsoft.ML.CommandLine;
@@ -71,10 +72,10 @@ internal sealed class Options : TransformInputBase
7172
/// </summary>
7273
public readonly string ImageFolder;
7374
/// <summary>
74-
/// The DataViewType for the image. It can be a VectorDataView of bytes or ImageDataView type.
75-
/// If no options are specified, it defaults to ImageDataView type.
75+
/// The flag for DataViewType for the image. If Type true, it is a VectorDataView of bytes else it is an ImageDataView type.
76+
/// If no options are specified, it defaults to false for ImageDataView type.
7677
/// </summary>
77-
public readonly DataViewType Type;
78+
public readonly bool Type;
7879

7980
/// <summary>
8081
/// The columns passed to this <see cref="ITransformer"/>.
@@ -91,7 +92,7 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
9192
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingTransformer)), columns)
9293
{
9394
ImageFolder = imageFolder;
94-
Type = new ImageDataViewType();
95+
Type = false;
9596
}
9697

9798
/// <summary>
@@ -101,14 +102,11 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
101102
/// <param name="imageFolder">Folder where to look for images.</param>
102103
/// <param name="type">Image type - ImageDataViewType or VectorDataViewType. Defaults to ImageDataViewType if not specified.</param>
103104
/// <param name="columns">Names of input and output columns.</param>
104-
internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null, DataViewType type = null, params (string outputColumnName, string inputColumnName)[] columns)
105+
internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null, bool type = false, params (string outputColumnName, string inputColumnName)[] columns)
105106
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingTransformer)), columns)
106107
{
107108
ImageFolder = imageFolder;
108-
if (type == null)
109-
Type = new ImageDataViewType();
110-
else
111-
Type = type;
109+
Type = type;
112110
}
113111

114112
// Factory method for SignatureDataTransform.
@@ -125,7 +123,7 @@ private static ImageLoadingTransformer Create(IHostEnvironment env, ModelLoadCon
125123
env.CheckValue(ctx, nameof(ctx));
126124

127125
ctx.CheckAtModel(GetVersionInfo());
128-
return new ImageLoadingTransformer(env.Register(nameof(ImageLoadingTransformer)), ctx );
126+
return new ImageLoadingTransformer(env.Register(nameof(ImageLoadingTransformer)), ctx);
129127
}
130128

131129
private ImageLoadingTransformer(IHost host, ModelLoadContext ctx)
@@ -137,13 +135,13 @@ private ImageLoadingTransformer(IHost host, ModelLoadContext ctx)
137135

138136
ImageFolder = ctx.LoadStringOrNull();
139137

140-
if(new VectorDataViewType(NumberDataViewType.Byte).ToString().Equals(ctx.LoadStringOrNull()))
138+
if (ctx.LoadStringOrNull().Equals("True"))
141139
{
142-
Type = new VectorDataViewType(NumberDataViewType.Byte);
140+
Type = true; // It is a VBuffer<byte> type
143141
}
144142
else
145143
{
146-
Type = new ImageDataViewType();
144+
Type = false; // It is a ImageDataViewType
147145
}
148146

149147
}
@@ -195,9 +193,9 @@ private static VersionInfo GetVersionInfo()
195193
private sealed class Mapper : OneToOneMapperBase
196194
{
197195
private readonly ImageLoadingTransformer _parent;
198-
private readonly DataViewType _type;
196+
private readonly bool _type;
199197

200-
public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, DataViewType type)
198+
public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, bool type)
201199
: base(parent.Host.Register(nameof(Mapper)), parent, inputSchema)
202200
{
203201
_type = type;
@@ -207,18 +205,18 @@ public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, DataVi
207205
protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func<int, bool> activeOutput, out Action disposer)
208206
{
209207
disposer = null;
210-
// Check for the type of Image, VBuffer<bytes> or ImageDataViewType and call the appropriate MakeGetter function
211-
if (new VectorDataViewType(NumberDataViewType.Byte).Equals(_type))
208+
// Check for the type of Image, if true load images as VBuffer<bytes> else load images as ImageDataViewType
209+
if (_type)
212210
{
213-
return MakeGetterType(input, iinfo, activeOutput, (VectorDataViewType)_type, out disposer);
211+
return MakeGetterVectorDataViewByteType(input, iinfo, activeOutput, out disposer);
214212
}
215213
else
216214
{
217-
return MakeGetterType(input, iinfo, activeOutput, (ImageDataViewType)_type, out disposer);
215+
return MakeGetterImageDataViewType(input, iinfo, activeOutput, out disposer);
218216
}
219217
}
220218

221-
private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> activeOutput, ImageDataViewType type, out Action disposer)
219+
private Delegate MakeGetterImageDataViewType(DataViewRow input, int iinfo, Func<int, bool> activeOutput, out Action disposer)
222220
{
223221
Contracts.AssertValue(input);
224222
Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length);
@@ -253,7 +251,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
253251
return del;
254252
}
255253

256-
private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> activeOutput, VectorDataViewType type, out Action disposer)
254+
private Delegate MakeGetterVectorDataViewByteType(DataViewRow input, int iinfo, Func<int, bool> activeOutput, out Action disposer)
257255
{
258256
Contracts.AssertValue(input);
259257
Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length);
@@ -283,6 +281,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
283281
public static int LoadDataIntoBuffer(string path, ref VBuffer<byte> imgData)
284282
{
285283
int count = -1;
284+
int bytesread = -1;
286285
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
287286
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1))
288287
{
@@ -291,34 +290,68 @@ public static int LoadDataIntoBuffer(string path, ref VBuffer<byte> imgData)
291290
throw new IOException($"File {path} too big to open.");
292291
else if (fileLength == 0)
293292
{
293+
Console.WriteLine("File length is zero even though the image is not empty");
294294
byte[] imageBuffer;
295295
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
296296
// Thus we need to assume 0 doesn't mean empty.
297297
imageBuffer = File.ReadAllBytes(path);
298298
count = imageBuffer.Length;
299-
Console.WriteLine("File length is zero");
299+
imgData = new VBuffer<byte>(count,imageBuffer);
300+
return count;
300301
}
301302

302303
count = (int)fileLength;
304+
var editor = VBufferEditor.Create(ref imgData, count);
303305

304306
#if NETSTANDARD2_0
305-
byte[] buffer = null;
306-
buffer = File.ReadAllBytes(path);
307-
imgData = new VBuffer<byte>(buffer.Length, buffer);
307+
bytesread = ReadToEnd(fs, editor.Values);
308+
Contracts.Assert(count == bytesread);
308309

309310
#else
310-
var editor = VBufferEditor.Create(ref imgData, count);
311311
fs.Read(editor.Values);
312-
imgData = editor.Commit();
312+
bytesread = editor.Values.Length;
313+
Contracts.Assert(count == bytesread);
313314
#endif
315+
imgData = editor.Commit();
314316
return count;
315317

316318
}
317319

318320
}
319321

322+
public static int ReadToEnd(System.IO.Stream stream, Span<byte> bufferspan)
323+
{
324+
325+
int chunksize = 4096; // Most optimal size for buffer, friendly to CPU's L1 cache
326+
byte[] readBuffer = new byte[chunksize];
327+
int totalBytesRead = 0;
328+
int bytesRead;
329+
unsafe
330+
{
331+
fixed (byte* readBufferPtr = readBuffer)
332+
fixed (byte* bufferSpanPtr = &MemoryMarshal.GetReference(bufferspan))
333+
{
334+
while ((bytesRead = stream.Read(readBuffer, 0, readBuffer.Length)) > 0)
335+
{
336+
Buffer.MemoryCopy(readBufferPtr, bufferSpanPtr + totalBytesRead, bufferspan.Length - totalBytesRead, bytesRead);
337+
totalBytesRead += bytesRead;
338+
}
339+
}
340+
}
341+
return totalBytesRead;
342+
343+
}
344+
345+
public DataViewType GetDataViewType()
346+
{
347+
if (_type)
348+
return new VectorDataViewType(NumberDataViewType.Byte);
349+
else
350+
return new ImageDataViewType();
351+
}
352+
320353
protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
321-
=> _parent.ColumnPairs.Select(x => new DataViewSchema.DetachedColumn(x.outputColumnName, _type, null)).ToArray();
354+
=> _parent.ColumnPairs.Select(x => new DataViewSchema.DetachedColumn(x.outputColumnName, GetDataViewType(), null)).ToArray();
322355
}
323356
}
324357

@@ -371,18 +404,18 @@ internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, params
371404
/// <param name="imageFolder">Folder where to look for images.</param>
372405
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
373406
/// <param name="columns">Names of input and output columns.</param>
374-
internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, DataViewType type = null, params (string outputColumnName, string inputColumnName)[] columns)
407+
internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, bool type = false, params (string outputColumnName, string inputColumnName)[] columns)
375408
: this(env, new ImageLoadingTransformer(env, imageFolder, type, columns), type)
376409
{
377410
}
378411

379-
internal ImageLoadingEstimator(IHostEnvironment env, ImageLoadingTransformer transformer, DataViewType type = null)
412+
internal ImageLoadingEstimator(IHostEnvironment env, ImageLoadingTransformer transformer, bool type = false)
380413
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingEstimator)), transformer)
381414
{
382-
if (type == null)
415+
if (!type)
383416
_imageType = new ImageDataViewType();
384417
else
385-
_imageType = type;
418+
_imageType = new VectorDataViewType(NumberDataViewType.Byte);
386419
}
387420

388421
/// <summary>
@@ -400,7 +433,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
400433
if (!(col.ItemType is TextDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar)
401434
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", inputColumnName, TextDataViewType.Instance.ToString(), col.GetTypeString());
402435

403-
result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Scalar, _imageType, false);
436+
if (_imageType is ImageDataViewType)
437+
result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Scalar, _imageType, false);
438+
else
439+
result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Byte, false);
404440
}
405441

406442
return new SchemaShape(result.Values);

src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj

+1-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<PropertyGroup>
44
<TargetFrameworks>netstandard2.0;netstandard2.1;netcoreapp2.1</TargetFrameworks>
55
<IncludeInPackage>Microsoft.ML.ImageAnalytics</IncludeInPackage>
6+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
67
</PropertyGroup>
78

89
<ItemGroup>
@@ -13,11 +14,5 @@
1314
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
1415
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
1516
</ItemGroup>
16-
17-
<ItemGroup>
18-
<Reference Include="System.Buffers">
19-
<HintPath>..\..\..\..\..\..\Program Files\dotnet\packs\Microsoft.NETCore.App.Ref\3.0.0-preview8-28405-07\ref\netcoreapp3.0\System.Buffers.dll</HintPath>
20-
</Reference>
21-
</ItemGroup>
2217

2318
</Project>

0 commit comments

Comments
 (0)