@@ -9,15 +9,16 @@ namespace Microsoft.ML.Auto
9
9
{
10
10
internal static class ColumnInferenceApi
11
11
{
12
- public static ColumnInferenceResult InferColumns ( MLContext context , string path , string label ,
13
- bool hasHeader , char ? separatorChar , bool ? allowQuotedStrings , bool ? supportSparse , bool trimWhitespace )
12
+ public static ColumnInferenceResult InferColumns ( MLContext context , string path , string label ,
13
+ bool hasHeader , char ? separatorChar , bool ? allowQuotedStrings , bool ? supportSparse , bool trimWhitespace , bool groupColumns )
14
14
{
15
15
var sample = TextFileSample . CreateFromFullFile ( path ) ;
16
16
var splitInference = InferSplit ( sample , separatorChar , allowQuotedStrings , supportSparse ) ;
17
17
var typeInference = InferColumnTypes ( context , sample , splitInference , hasHeader ) ;
18
+ var loaderColumns = ColumnTypeInference . GenerateLoaderColumns ( typeInference . Columns ) ;
18
19
var typedLoaderArgs = new TextLoader . Arguments
19
20
{
20
- Column = ColumnTypeInference . GenerateLoaderColumns ( typeInference . Columns ) ,
21
+ Column = loaderColumns ,
21
22
Separator = splitInference . Separator ,
22
23
AllowSparse = splitInference . AllowSparse ,
23
24
AllowQuoting = splitInference . AllowQuote ,
@@ -29,12 +30,24 @@ public static ColumnInferenceResult InferColumns(MLContext context, string path,
29
30
30
31
var purposeInferenceResult = PurposeInference . InferPurposes ( context , dataView , label ) ;
31
32
33
+ ( TextLoader . Column , ColumnPurpose Purpose ) [ ] inferredColumns = null ;
32
34
// infer column grouping and generate column names
33
- var groupingResult = ColumnGroupingInference . InferGroupingAndNames ( context , hasHeader ,
34
- typeInference . Columns , purposeInferenceResult ) ;
35
+ if ( groupColumns )
36
+ {
37
+ var groupingResult = ColumnGroupingInference . InferGroupingAndNames ( context , hasHeader ,
38
+ typeInference . Columns , purposeInferenceResult ) ;
35
39
36
- // build result objects & return
37
- var inferredColumns = groupingResult . Select ( c => ( c . GenerateTextLoaderColumn ( ) , c . Purpose ) ) . ToArray ( ) ;
40
+ // build result objects & return
41
+ inferredColumns = groupingResult . Select ( c => ( c . GenerateTextLoaderColumn ( ) , c . Purpose ) ) . ToArray ( ) ;
42
+ }
43
+ else
44
+ {
45
+ inferredColumns = new ( TextLoader . Column , ColumnPurpose Purpose ) [ loaderColumns . Length ] ;
46
+ for ( int i = 0 ; i < loaderColumns . Length ; i ++ )
47
+ {
48
+ inferredColumns [ i ] = ( loaderColumns [ i ] , purposeInferenceResult [ i ] . Purpose ) ;
49
+ }
50
+ }
38
51
return new ColumnInferenceResult ( inferredColumns , splitInference . AllowQuote , splitInference . AllowSparse , splitInference . Separator , hasHeader , trimWhitespace ) ;
39
52
}
40
53
@@ -44,15 +57,15 @@ private static TextFileContents.ColumnSplitResult InferSplit(TextFileSample samp
44
57
var splitInference = TextFileContents . TrySplitColumns ( sample , separatorCandidates ) ;
45
58
46
59
// respect passed-in overrides
47
- if ( allowQuotedStrings != null )
60
+ if ( allowQuotedStrings != null )
48
61
{
49
62
splitInference . AllowQuote = allowQuotedStrings . Value ;
50
63
}
51
- if ( supportSparse != null )
64
+ if ( supportSparse != null )
52
65
{
53
66
splitInference . AllowSparse = supportSparse . Value ;
54
67
}
55
-
68
+
56
69
if ( ! splitInference . IsSuccess )
57
70
{
58
71
throw new InferenceException ( InferenceType . ColumnSplit , "Unable to split the file provided into multiple, consistent columns." ) ;
0 commit comments