-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add PartitionedFileLoader #61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
a07091b
67a358c
0bc8a2b
ce3edce
eebf207
bcd4aad
748ffe7
4549388
781a45e
e54698a
885ff30
bbf8de8
9a2d641
225b7ee
4497c35
1e01903
10f47b4
90bedc4
c0467e6
674b5cb
5265c90
a040b51
fe6ca03
097086f
fe3229f
d3997cb
3c5d6d8
5edd446
c1a5897
7d09e32
d9905bb
ec92ecd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
col1, col2 | ||
0, 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
col1, col2 | ||
4, 5 | ||
6, 7 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
col1, col2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
col1, col2 | ||
21, 22 | ||
23, 24 | ||
25, 26 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
col1, col2 | ||
21, 22 | ||
23, 24 | ||
25, 26 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
col1, col2 | ||
0, 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
col1, col2 | ||
4, 5 | ||
6, 7 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
col1 | ||
11 | ||
12 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
col1, col2 | ||
21, 22 | ||
23, 24 | ||
25, 26 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=L0:TX:0 | ||
#@ col=Year:TX:1 | ||
#@ col=Month:TX:2 | ||
#@ } | ||
L0 Year Month | ||
0 2017 01 | ||
4 2017 01 | ||
6 2017 01 | ||
21 2017 02 | ||
23 2017 02 | ||
25 2017 02 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
---- PartitionedFileLoader ---- | ||
3 columns: | ||
L0: Text | ||
Year: Text | ||
Month: Text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=L0:I4:0 | ||
#@ col=Month:I4:1 | ||
#@ col=Path:TX:2 | ||
#@ } | ||
L0 Month Path | ||
1 1 2017\01\data1.csv | ||
5 1 2017\01\data2.csv | ||
7 1 2017\01\data2.csv | ||
0 1 2017\01\dataBadSchema.csv | ||
0 1 2017\01\dataBadSchema.csv | ||
22 2 2017\02\data1.csv | ||
24 2 2017\02\data1.csv | ||
26 2 2017\02\data1.csv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
---- PartitionedFileLoader ---- | ||
3 columns: | ||
L0: I4 | ||
Month: I4 | ||
Path: Text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=L0:TX:0 | ||
#@ col=Year:TX:1 | ||
#@ col=Month:TX:2 | ||
#@ } | ||
L0 Year Month | ||
0 2017 01 | ||
4 2017 01 | ||
6 2017 01 | ||
21 2017 02 | ||
23 2017 02 | ||
25 2017 02 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
---- PartitionedFileLoader ---- | ||
3 columns: | ||
L0: Text | ||
Year: Text | ||
Month: Text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=L0:I4:0 | ||
#@ col=Month:I4:1 | ||
#@ col=Path:TX:2 | ||
#@ } | ||
L0 Month Path | ||
1 1 2017\01\data1.csv | ||
5 1 2017\01\data2.csv | ||
7 1 2017\01\data2.csv | ||
0 1 2017\01\dataBadSchema.csv | ||
0 1 2017\01\dataBadSchema.csv | ||
22 2 2017\02\data1.csv | ||
24 2 2017\02\data1.csv | ||
26 2 2017\02\data1.csv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
---- PartitionedFileLoader ---- | ||
3 columns: | ||
L0: I4 | ||
Month: I4 | ||
Path: Text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Threading; | ||
|
||
namespace Microsoft.ML.Runtime.Internal.Utilities | ||
|
@@ -67,13 +69,13 @@ public static string FindExistentFileOrNull(string fileName, string folderPrefix | |
// 1. Search in customSearchDir. | ||
if (!string.IsNullOrWhiteSpace(customSearchDir) | ||
&& TryFindFile(fileName, folderPrefix, customSearchDir, out candidate)) | ||
return candidate; | ||
return candidate; | ||
|
||
// 2. Search in the path specified by the environment variable. | ||
var envDir = Environment.GetEnvironmentVariable(CustomSearchDirEnvVariable); | ||
if (!string.IsNullOrWhiteSpace(envDir) | ||
&& TryFindFile(fileName, folderPrefix, envDir, out candidate)) | ||
return candidate; | ||
return candidate; | ||
|
||
// 3. Search in the path specified by the assemblyForBasePath. | ||
if (assemblyForBasePath != null) | ||
|
@@ -139,5 +141,63 @@ public static string CreateFolderIfNotExists(string folder) | |
|
||
return null; | ||
} | ||
|
||
/// <summary> | ||
/// Make a full path realtive to a base path. | ||
/// </summary> | ||
/// <param name="basepath">The base path, assumed to be a directory.</param> | ||
/// <param name="path">The full path.</param> | ||
/// <returns>The relative path.</returns> | ||
/// <exception cref="ArgumentException">If the paths are not relative.</exception> | ||
public static string MakePathRelative(string basepath, string path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We might prefer to have at least two distinct usages of some facility before it is "promoted" to utilities, for two practical reasons:
What do we think about keeping this internal so that it can be used from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no object to this, however I can't just make the methods internal since the usage is in a different assembly. So I had to move it to a different file to be internal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, right, that's what I meant, thanks Tyler -- maybe made with different class name to avoid name collisions in that namesapce though. In reply to: 190063880 [](ancestors = 190063880) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. resolved. |
||
{ | ||
Contracts.AssertNonEmpty(basepath); | ||
Contracts.AssertNonEmpty(path); | ||
|
||
Uri baseUri = new Uri(basepath); | ||
Uri uri = new Uri(path); | ||
|
||
if (baseUri.Scheme != uri.Scheme) | ||
{ | ||
throw new ArgumentException("Paths cannot be made relative as they are of different schemas."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
schemE #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can se use host.except instead of throw? Might need to have to pass env or host. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a Util class, it's my opinion it should be unaware of host. It would then be incumbent on the caller (who is aware of host) to pass the exception on to host.except. Is there a strong reason to avoid thowing that I'm not aware of? #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Tyler! While we generally prefer that exceptions be thrown through host so that the context information of the call is preserved, we have not done this with the utils classes for one reason or another. So I think it's fine that you haven't. However, it is still important that the exception be marked. E.g.: use Note that we still use In reply to: 187710640 [](ancestors = 187710640) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that if you're using Contracts.CheckParam(baseUri.Scheme == uri.Scheme, nameof(basepath), "Paths cannot be made relative as they are of different schemes."); Similar for the below, maybe. In reply to: 189994300 [](ancestors = 189994300,187710640) |
||
} | ||
|
||
string relativePath; | ||
try | ||
{ | ||
if (!baseUri.AbsoluteUri.EndsWith("/")) | ||
{ | ||
baseUri = new Uri(baseUri.AbsoluteUri + "/"); | ||
} | ||
|
||
relativePath = baseUri.MakeRelativeUri(uri).ToString(); | ||
} | ||
catch (ArgumentNullException e) | ||
{ | ||
throw new ArgumentException("Paths could not be made relative.", e); | ||
} | ||
catch (InvalidOperationException e) | ||
{ | ||
throw new ArgumentException("Paths could not be made relative.", e); | ||
} | ||
|
||
if (uri.Scheme.Equals("file", StringComparison.InvariantCultureIgnoreCase)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It is generally best practice to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the tip. |
||
{ | ||
relativePath = relativePath.Replace(Path.AltDirectorySeparatorChar, Path.DirectorySeparatorChar); | ||
} | ||
|
||
return relativePath; | ||
} | ||
|
||
/// <summary> | ||
/// Split a path string into an enumerable list of the directories. | ||
/// </summary> | ||
/// <param name="path">The path string to split.</param> | ||
/// <returns>An enumerable list of all non-empty directories.</returns> | ||
public static IEnumerable<string> SplitDirectories(string path) | ||
{ | ||
var cleanPath = path.Replace(Path.AltDirectorySeparatorChar, Path.DirectorySeparatorChar); | ||
return cleanPath.Split(new char[] { Path.DirectorySeparatorChar }, StringSplitOptions.RemoveEmptyEntries); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
this creates garbage per call. Can we create a const/static var to recycle this char array? #Closed |
||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmmm. I know you didn't write this code, but since the if condition is on multiple lines it really ought to be bracketed... if you have time could you fix it? The below if condition has the same problem I'm afraid. #Closed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries. I'm always happy to clean up stuff.