3
3
// See the LICENSE file in the project root for more information.
4
4
5
5
using System ;
6
- using System . Buffers ;
7
6
using System . Collections . Generic ;
8
7
using System . Drawing ;
9
8
using System . IO ;
10
9
using System . Linq ;
10
+ using System . Runtime . InteropServices ;
11
+ using System . Security . Cryptography ;
11
12
using System . Text ;
12
13
using Microsoft . ML ;
13
14
using Microsoft . ML . CommandLine ;
@@ -71,10 +72,10 @@ internal sealed class Options : TransformInputBase
71
72
/// </summary>
72
73
public readonly string ImageFolder ;
73
74
/// <summary>
74
- /// The DataViewType for the image. It can be a VectorDataView of bytes or ImageDataView type.
75
- /// If no options are specified, it defaults to ImageDataView type.
75
+ /// The flag for DataViewType for the image. If Type true, it is a VectorDataView of bytes else it is an ImageDataView type.
76
+ /// If no options are specified, it defaults to false for ImageDataView type.
76
77
/// </summary>
77
- public readonly DataViewType Type ;
78
+ public readonly bool Type ;
78
79
79
80
/// <summary>
80
81
/// The columns passed to this <see cref="ITransformer"/>.
@@ -91,7 +92,7 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
91
92
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingTransformer ) ) , columns )
92
93
{
93
94
ImageFolder = imageFolder ;
94
- Type = new ImageDataViewType ( ) ;
95
+ Type = false ;
95
96
}
96
97
97
98
/// <summary>
@@ -101,14 +102,11 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
101
102
/// <param name="imageFolder">Folder where to look for images.</param>
102
103
/// <param name="type">Image type - ImageDataViewType or VectorDataViewType. Defaults to ImageDataViewType if not specified.</param>
103
104
/// <param name="columns">Names of input and output columns.</param>
104
- internal ImageLoadingTransformer ( IHostEnvironment env , string imageFolder = null , DataViewType type = null , params ( string outputColumnName , string inputColumnName ) [ ] columns )
105
+ internal ImageLoadingTransformer ( IHostEnvironment env , string imageFolder = null , bool type = false , params ( string outputColumnName , string inputColumnName ) [ ] columns )
105
106
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingTransformer ) ) , columns )
106
107
{
107
108
ImageFolder = imageFolder ;
108
- if ( type == null )
109
- Type = new ImageDataViewType ( ) ;
110
- else
111
- Type = type ;
109
+ Type = type ;
112
110
}
113
111
114
112
// Factory method for SignatureDataTransform.
@@ -125,7 +123,7 @@ private static ImageLoadingTransformer Create(IHostEnvironment env, ModelLoadCon
125
123
env . CheckValue ( ctx , nameof ( ctx ) ) ;
126
124
127
125
ctx . CheckAtModel ( GetVersionInfo ( ) ) ;
128
- return new ImageLoadingTransformer ( env . Register ( nameof ( ImageLoadingTransformer ) ) , ctx ) ;
126
+ return new ImageLoadingTransformer ( env . Register ( nameof ( ImageLoadingTransformer ) ) , ctx ) ;
129
127
}
130
128
131
129
private ImageLoadingTransformer ( IHost host , ModelLoadContext ctx )
@@ -137,13 +135,13 @@ private ImageLoadingTransformer(IHost host, ModelLoadContext ctx)
137
135
138
136
ImageFolder = ctx . LoadStringOrNull ( ) ;
139
137
140
- if ( new VectorDataViewType ( NumberDataViewType . Byte ) . ToString ( ) . Equals ( ctx . LoadStringOrNull ( ) ) )
138
+ if ( ctx . LoadStringOrNull ( ) . Equals ( "True" ) )
141
139
{
142
- Type = new VectorDataViewType ( NumberDataViewType . Byte ) ;
140
+ Type = true ; // It is a VBuffer<byte> type
143
141
}
144
142
else
145
143
{
146
- Type = new ImageDataViewType ( ) ;
144
+ Type = false ; // It is a ImageDataViewType
147
145
}
148
146
149
147
}
@@ -195,9 +193,9 @@ private static VersionInfo GetVersionInfo()
195
193
private sealed class Mapper : OneToOneMapperBase
196
194
{
197
195
private readonly ImageLoadingTransformer _parent ;
198
- private readonly DataViewType _type ;
196
+ private readonly bool _type ;
199
197
200
- public Mapper ( ImageLoadingTransformer parent , DataViewSchema inputSchema , DataViewType type )
198
+ public Mapper ( ImageLoadingTransformer parent , DataViewSchema inputSchema , bool type )
201
199
: base ( parent . Host . Register ( nameof ( Mapper ) ) , parent , inputSchema )
202
200
{
203
201
_type = type ;
@@ -207,18 +205,18 @@ public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, DataVi
207
205
protected override Delegate MakeGetter ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
208
206
{
209
207
disposer = null ;
210
- // Check for the type of Image, VBuffer<bytes> or ImageDataViewType and call the appropriate MakeGetter function
211
- if ( new VectorDataViewType ( NumberDataViewType . Byte ) . Equals ( _type ) )
208
+ // Check for the type of Image, if true load images as VBuffer<bytes> else load images as ImageDataViewType
209
+ if ( _type )
212
210
{
213
- return MakeGetterType ( input , iinfo , activeOutput , ( VectorDataViewType ) _type , out disposer ) ;
211
+ return MakeGetterVectorDataViewByteType ( input , iinfo , activeOutput , out disposer ) ;
214
212
}
215
213
else
216
214
{
217
- return MakeGetterType ( input , iinfo , activeOutput , ( ImageDataViewType ) _type , out disposer ) ;
215
+ return MakeGetterImageDataViewType ( input , iinfo , activeOutput , out disposer ) ;
218
216
}
219
217
}
220
218
221
- private Delegate MakeGetterType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , ImageDataViewType type , out Action disposer )
219
+ private Delegate MakeGetterImageDataViewType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
222
220
{
223
221
Contracts . AssertValue ( input ) ;
224
222
Contracts . Assert ( 0 <= iinfo && iinfo < _parent . ColumnPairs . Length ) ;
@@ -253,7 +251,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
253
251
return del ;
254
252
}
255
253
256
- private Delegate MakeGetterType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , VectorDataViewType type , out Action disposer )
254
+ private Delegate MakeGetterVectorDataViewByteType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
257
255
{
258
256
Contracts . AssertValue ( input ) ;
259
257
Contracts . Assert ( 0 <= iinfo && iinfo < _parent . ColumnPairs . Length ) ;
@@ -283,6 +281,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
283
281
public static int LoadDataIntoBuffer ( string path , ref VBuffer < byte > imgData )
284
282
{
285
283
int count = - 1 ;
284
+ int bytesread = - 1 ;
286
285
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
287
286
using ( FileStream fs = new FileStream ( path , FileMode . Open , FileAccess . Read , FileShare . Read , bufferSize : 1 ) )
288
287
{
@@ -291,34 +290,68 @@ public static int LoadDataIntoBuffer(string path, ref VBuffer<byte> imgData)
291
290
throw new IOException ( $ "File { path } too big to open.") ;
292
291
else if ( fileLength == 0 )
293
292
{
293
+ Console . WriteLine ( "File length is zero even though the image is not empty" ) ;
294
294
byte [ ] imageBuffer ;
295
295
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
296
296
// Thus we need to assume 0 doesn't mean empty.
297
297
imageBuffer = File . ReadAllBytes ( path ) ;
298
298
count = imageBuffer . Length ;
299
- Console . WriteLine ( "File length is zero" ) ;
299
+ imgData = new VBuffer < byte > ( count , imageBuffer ) ;
300
+ return count ;
300
301
}
301
302
302
303
count = ( int ) fileLength ;
304
+ var editor = VBufferEditor . Create ( ref imgData , count ) ;
303
305
304
306
#if NETSTANDARD2_0
305
- byte [ ] buffer = null ;
306
- buffer = File . ReadAllBytes ( path ) ;
307
- imgData = new VBuffer < byte > ( buffer . Length , buffer ) ;
307
+ bytesread = ReadToEnd ( fs , editor . Values ) ;
308
+ Contracts . Assert ( count == bytesread ) ;
308
309
309
310
#else
310
- var editor = VBufferEditor . Create ( ref imgData , count ) ;
311
311
fs . Read ( editor . Values ) ;
312
- imgData = editor . Commit ( ) ;
312
+ bytesread = editor . Values . Length ;
313
+ Contracts . Assert ( count == bytesread ) ;
313
314
#endif
315
+ imgData = editor . Commit ( ) ;
314
316
return count ;
315
317
316
318
}
317
319
318
320
}
319
321
322
+ public static int ReadToEnd ( System . IO . Stream stream , Span < byte > bufferspan )
323
+ {
324
+
325
+ int chunksize = 4096 ; // Most optimal size for buffer, friendly to CPU's L1 cache
326
+ byte [ ] readBuffer = new byte [ chunksize ] ;
327
+ int totalBytesRead = 0 ;
328
+ int bytesRead ;
329
+ unsafe
330
+ {
331
+ fixed ( byte * readBufferPtr = readBuffer )
332
+ fixed ( byte * bufferSpanPtr = & MemoryMarshal . GetReference ( bufferspan ) )
333
+ {
334
+ while ( ( bytesRead = stream . Read ( readBuffer , 0 , readBuffer . Length ) ) > 0 )
335
+ {
336
+ Buffer . MemoryCopy ( readBufferPtr , bufferSpanPtr + totalBytesRead , bufferspan . Length - totalBytesRead , bytesRead ) ;
337
+ totalBytesRead += bytesRead ;
338
+ }
339
+ }
340
+ }
341
+ return totalBytesRead ;
342
+
343
+ }
344
+
345
+ public DataViewType GetDataViewType ( )
346
+ {
347
+ if ( _type )
348
+ return new VectorDataViewType ( NumberDataViewType . Byte ) ;
349
+ else
350
+ return new ImageDataViewType ( ) ;
351
+ }
352
+
320
353
protected override DataViewSchema . DetachedColumn [ ] GetOutputColumnsCore ( )
321
- => _parent . ColumnPairs . Select ( x => new DataViewSchema . DetachedColumn ( x . outputColumnName , _type , null ) ) . ToArray ( ) ;
354
+ => _parent . ColumnPairs . Select ( x => new DataViewSchema . DetachedColumn ( x . outputColumnName , GetDataViewType ( ) , null ) ) . ToArray ( ) ;
322
355
}
323
356
}
324
357
@@ -371,18 +404,18 @@ internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, params
371
404
/// <param name="imageFolder">Folder where to look for images.</param>
372
405
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
373
406
/// <param name="columns">Names of input and output columns.</param>
374
- internal ImageLoadingEstimator ( IHostEnvironment env , string imageFolder , DataViewType type = null , params ( string outputColumnName , string inputColumnName ) [ ] columns )
407
+ internal ImageLoadingEstimator ( IHostEnvironment env , string imageFolder , bool type = false , params ( string outputColumnName , string inputColumnName ) [ ] columns )
375
408
: this ( env , new ImageLoadingTransformer ( env , imageFolder , type , columns ) , type )
376
409
{
377
410
}
378
411
379
- internal ImageLoadingEstimator ( IHostEnvironment env , ImageLoadingTransformer transformer , DataViewType type = null )
412
+ internal ImageLoadingEstimator ( IHostEnvironment env , ImageLoadingTransformer transformer , bool type = false )
380
413
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingEstimator ) ) , transformer )
381
414
{
382
- if ( type == null )
415
+ if ( ! type )
383
416
_imageType = new ImageDataViewType ( ) ;
384
417
else
385
- _imageType = type ;
418
+ _imageType = new VectorDataViewType ( NumberDataViewType . Byte ) ;
386
419
}
387
420
388
421
/// <summary>
@@ -400,7 +433,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
400
433
if ( ! ( col . ItemType is TextDataViewType ) || col . Kind != SchemaShape . Column . VectorKind . Scalar )
401
434
throw Host . ExceptSchemaMismatch ( nameof ( inputSchema ) , "input" , inputColumnName , TextDataViewType . Instance . ToString ( ) , col . GetTypeString ( ) ) ;
402
435
403
- result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Scalar , _imageType , false ) ;
436
+ if ( _imageType is ImageDataViewType )
437
+ result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Scalar , _imageType , false ) ;
438
+ else
439
+ result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Byte , false ) ;
404
440
}
405
441
406
442
return new SchemaShape ( result . Values ) ;
0 commit comments