-
Notifications
You must be signed in to change notification settings - Fork 1.9k
IndexOutOfRange Exception in KeyToVector transformer #2681
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
805d579
fcdf059
058eaac
055abae
acec961
34c2f1a
540b139
f0f4e6b
3b1fc7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1273,7 +1273,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) | |
metadata.Add(slotMeta); | ||
if (colInfo.InvertHash != 0) | ||
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false)); | ||
result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, col.ItemType is VectorType ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true, new SchemaShape(metadata)); | ||
result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, col.Kind, NumberDataViewType.UInt32, true, new SchemaShape(metadata)); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it mean input shape must be scalar? If yes, we need to throw when encountering a vector. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. It means we accept vectors, scalars and varvectors In reply to: 263166153 [](ancestors = 263166153) |
||
return new SchemaShape(result.Values); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -559,7 +559,7 @@ private ValueGetter<VBuffer<float>> MakeGetterInd(DataViewRow input, int iinfo) | |
int lenDst = checked(size * lenSrc); | ||
var values = src.GetValues(); | ||
int cntSrc = values.Length; | ||
var editor = VBufferEditor.Create(ref dst, lenDst, cntSrc); | ||
var editor = VBufferEditor.Create(ref dst, lenDst, valuesCount: cntSrc, requireIndicesOnDense: true); | ||
|
||
int count = 0; | ||
if (src.IsDense) | ||
|
@@ -793,14 +793,16 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) | |
|
||
var metadata = new List<SchemaShape.Column>(); | ||
if (col.Annotations.TryFindColumn(AnnotationUtils.Kinds.KeyValues, out var keyMeta)) | ||
if (col.Kind != SchemaShape.Column.VectorKind.VariableVector && keyMeta.ItemType is TextDataViewType) | ||
if (((colInfo.Bag && col.IsKey) || col.Kind != SchemaShape.Column.VectorKind.VariableVector) && keyMeta.ItemType is TextDataViewType) | ||
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, keyMeta.ItemType, false)); | ||
if (!colInfo.Bag && (col.Kind == SchemaShape.Column.VectorKind.Scalar || col.Kind == SchemaShape.Column.VectorKind.Vector)) | ||
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.CategoricalSlotRanges, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Int32, false)); | ||
if (!colInfo.Bag || (col.Kind == SchemaShape.Column.VectorKind.Scalar)) | ||
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false)); | ||
|
||
result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); | ||
result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, | ||
col.Kind == SchemaShape.Column.VectorKind.VariableVector && !colInfo.Bag ? SchemaShape.Column.VectorKind.VariableVector : SchemaShape.Column.VectorKind.Vector, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Um... yeah.if you wanted to explode out this condition a bit with a bunch of clarifying comments I wouldn't object to this becoming more than a one-line statement. I found this slightly hard to unpack. (Correct, I think, just hard to unpack.) |
||
NumberDataViewType.Single, false, new SchemaShape(metadata)); | ||
} | ||
|
||
return new SchemaShape(result.Values); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -466,7 +466,9 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) | |
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, keyMeta.ItemType, false)); | ||
if (col.Kind == SchemaShape.Column.VectorKind.Scalar) | ||
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false)); | ||
result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false, new SchemaShape(metadata)); | ||
result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, | ||
col.Kind == SchemaShape.Column.VectorKind.VariableVector ? SchemaShape.Column.VectorKind.VariableVector : SchemaShape.Column.VectorKind.Vector, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is key-to-vector mapping. Can the input column be a vector-valued? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact a vector of keys is one of the most important scenarios (e.g., bag of words). |
||
NumberDataViewType.Single, false, new SchemaShape(metadata)); | ||
} | ||
|
||
return new SchemaShape(result.Values); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=A:I4:0 | ||
#@ col=B:I4:1-2 | ||
#@ col=C:I4:3-** | ||
#@ col={name=CatA type=U4 src={ min=-1} key=2} | ||
#@ col={name=CatA src={ min=-1 max=0 vector=+}} | ||
#@ col={name=CatB type=U4 src={ min=-1} key=2} | ||
#@ col={name=CatB src={ min=-1 max=1 vector=+}} | ||
#@ col={name=CatC type=U4 src={ min=-1} key=2} | ||
#@ col={name=CatC src={ min=-1 max=0 vector=+}} | ||
#@ col={name=CatD type=U4 src={ min=-1} key=2} | ||
#@ col={name=CatVA type=U4 src={ min=-1 max=0 vector=+} key=3} | ||
#@ col={name=CatVA src={ min=-1 max=1 vector=+}} | ||
#@ col={name=CatVB type=U4 src={ min=-1 max=0 vector=+} key=3} | ||
#@ col={name=CatVB src={ min=-1 max=4 vector=+}} | ||
#@ col={name=CatVC type=U4 src={ min=-1 max=0 vector=+} key=3} | ||
#@ col={name=CatVC src={ min=-1 max=4 vector=+}} | ||
#@ col={name=CatVD type=U4 src={ min=-1 max=0 vector=+} key=3} | ||
#@ col={name=CatVVA type=U4 src={ min=-1 var=+} key=3} | ||
#@ col={name=CatVVA src={ min=-1 max=1 vector=+}} | ||
#@ col={name=CatVVB type=U4 src={ min=-1 var=+} key=3} | ||
#@ col={name=CatVVB src={ min=-1 var=+}} | ||
#@ col={name=CatVVC type=U4 src={ min=-1 var=+} key=3} | ||
#@ col={name=CatVVC src={ min=-1 var=+}} | ||
#@ col={name=CatVVD type=U4 src={ min=-1 var=+} key=3} | ||
#@ } | ||
A "" "" CatA 1 4 CatB Bit2 Bit1 Bit0 CatC 1 4 CatD "" "" 2 3 4 "" "" [0].Bit2 [0].Bit1 [0].Bit0 [1].Bit2 [1].Bit1 [1].Bit0 "" "" [0].2 [0].3 [0].4 [1].2 [1].3 [1].4 "" "" 3 4 2 | ||
1 2 3 3 4 0 1 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 1 0 0 1 | ||
4 2 4 2 4 3 1 0 1 1 0 0 1 1 0 1 1 0 2 1 0 1 0 2 0 0 0 0 1 0 0 2 1 0 0 0 0 1 0 2 2 1 0 1 1 1 2 1 0 0 1 0 0 0 1 0 0 0 2 1 0 0 0 1 0 1 0 1 0 0 2 1 0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#@ TextLoader{ | ||
#@ header+ | ||
#@ sep=tab | ||
#@ col=A:TX:0 | ||
#@ col=B:TX:1-2 | ||
#@ col=C:TX:3-** | ||
#@ col={name=CatA type=U4 src={ min=-1} key=65536} | ||
#@ col={name=CatA src={ min=-1 max=65534 vector=+}} | ||
#@ col={name=CatB type=U4 src={ min=-1} key=65536} | ||
#@ col={name=CatB src={ min=-1 max=16 vector=+}} | ||
#@ col={name=CatC type=U4 src={ min=-1} key=65536} | ||
#@ col={name=CatC src={ min=-1 max=65534 vector=+}} | ||
#@ col={name=CatD type=U4 src={ min=-1} key=65536} | ||
#@ col={name=CatVA type=U4 src={ min=-1 max=0 vector=+} key=65536} | ||
#@ col={name=CatVA src={ min=-1 max=65534 vector=+}} | ||
#@ col={name=CatVB type=U4 src={ min=-1 max=0 vector=+} key=65536} | ||
#@ col={name=CatVB src={ min=-1 max=34 vector=+}} | ||
#@ col={name=CatVC type=U4 src={ min=-1 max=0 vector=+} key=65536} | ||
#@ col={name=CatVC src={ min=-1 max=131070 vector=+}} | ||
#@ col={name=CatVD type=U4 src={ min=-1 max=0 vector=+} key=65536} | ||
#@ col={name=CatVVA type=U4 src={ min=-1 var=+} key=65536} | ||
#@ col={name=CatVVA src={ min=-1 max=65534 vector=+}} | ||
#@ col={name=CatVVB type=U4 src={ min=-1 var=+} key=65536} | ||
#@ col={name=CatVVB src={ min=-1 var=+}} | ||
#@ col={name=CatVVC type=U4 src={ min=-1 var=+} key=65536} | ||
#@ col={name=CatVVC src={ min=-1 var=+}} | ||
#@ col={name=CatVVD type=U4 src={ min=-1 var=+} key=65536} | ||
#@ } | ||
A 393284 2:CatA 65539:CatB 65558:CatC 131095:CatD | ||
1 2 3 2 3 4 17369 589955 17369:1 65536:17369 65540:1 65545:1 65546:1 65547:1 65548:1 65550:1 65551:1 65554:1 65555:17369 82925:1 131092:17369 131093:45477 131094:61578 176572:1 192673:1 196631:45477 196632:61578 196635:1 196637:1 196638:1 196642:1 196643:1 196645:1 196648:1 196650:1 196653:1 196654:1 196655:1 196656:1 196661:1 196665:1 196667:1 196669:45477 196670:61578 242148:1 323785:1 327743:45477 327744:61578 327745:45477 327746:61578 327747:39452 367200:1 373225:1 389326:1 393284:45477 393285:61578 393286:39452 393289:1 393291:1 393292:1 393296:1 393297:1 393299:1 393302:1 393304:1 393307:1 393308:1 393309:1 393310:1 393315:1 393319:1 393321:1 393325:1 393328:1 393329:1 393331:1 393336:1 393337:1 393338:1 393341:45477 393342:61578 393343:39452 438821:1 520458:1 563868:1 589952:45477 589953:61578 589954:39452 | ||
4 4 5 3 4 5 20750 589955 20750:1 65536:20750 65540:1 65542:1 65546:1 65551:1 65552:1 65553:1 65555:20750 86306:1 131092:20750 131093:20750 131094:23709 151845:1 154804:1 196631:20750 196632:23709 196636:1 196638:1 196642:1 196647:1 196648:1 196649:1 196654:1 196656:1 196657:1 196658:1 196661:1 196664:1 196665:1 196666:1 196668:1 196669:20750 196670:23709 217421:1 285916:1 327743:20750 327744:23709 327745:47483 327746:61549 327747:22463 350211:1 375231:1 389297:1 393284:47483 393285:61549 393286:22463 393289:1 393291:1 393292:1 393293:1 393296:1 393298:1 393299:1 393300:1 393301:1 393303:1 393304:1 393307:1 393308:1 393309:1 393310:1 393316:1 393317:1 393319:1 393320:1 393322:1 393326:1 393328:1 393330:1 393331:1 393332:1 393333:1 393335:1 393336:1 393337:1 393338:1 393339:1 393340:1 393341:47483 393342:61549 393343:22463 440827:1 520429:1 546879:1 589952:47483 589953:61549 589954:22463 |
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Geez, the original author sure got confused.