Skip to content

Issue 665 #687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
* Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
/**
*
*/

package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import java.util.ArrayList;
Expand Down Expand Up @@ -60,6 +58,14 @@ public SpanLabelView(String viewName, String viewGenerator, TextAnnotation text,

@Override
public void addConstituent(Constituent constituent) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small improvement: please move start/end assignments inside if{} block, as they aren't being used otherwise.

if (!allowOverlappingSpans) {
int start = constituent.getStartSpan();
int end = constituent.getEndSpan();
if (this.getConstituentsCoveringSpan(start, end).size() != 0)
throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");
}

super.addConstituent(constituent);

// this sort is grossly inefficient when appending contiguous tokens one at a time.
Expand Down Expand Up @@ -95,9 +101,6 @@ public Constituent addSpanLabel(int start, int end, String label, double score)
new Constituent(label, score, this.getViewName(), this.getTextAnnotation(), start,
end);

if (!allowOverlappingSpans && this.getConstituentsCoveringSpan(start, end).size() != 0)
throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");

this.addConstituent(c);

return c;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,19 @@ public TokenLabelView(String viewName, TextAnnotation text) {
this(viewName, viewName + "-annotator", text, 1.0);
}

public TokenLabelView(String viewName, TextAnnotation text, boolean allowOverlappingSpans) {
this(viewName, viewName + "-annotator", text, 1.0, allowOverlappingSpans);
}

public TokenLabelView(String viewName, String viewGenerator, TextAnnotation text, double score) {
super(viewName, viewGenerator, text, score);
}

public TokenLabelView(String viewName, String viewGenerator, TextAnnotation text, double score,
boolean allowOverlappingSpans) {
super(viewName, viewGenerator, text, score, allowOverlappingSpans);
}

/**
* Adds a label to a token and returns the newly created constituent.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder;
import edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder;
import edu.illinois.cs.cogcomp.core.datastructures.IntPair;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
import edu.illinois.cs.cogcomp.nlp.tokenizer.Tokenizer;
import org.junit.Before;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

/**
* Test that addConstituent(Constituent) does not allow overlapping spans
*/
public class SpanLabelViewTest {
SpanLabelView overlappingSpansView;
SpanLabelView noOverlappingSpansView;
TextAnnotation ta;
Constituent baseConstituent;
Constituent overlappingConstituent;

private Tokenizer.Tokenization tokenization;

String viewName = "VIEWNAME";
String viewGenerator = "VIEW-GENERATOR";
String text = "This is a test string; do not pay it any mind.";
String corpusId = "TEST";
String textId = "ID";

double score = 42.0;
int baseStart = 0;
int baseEnd = 5;
int overStart = 2;
int overEnd = 6;

private Tokenizer.Tokenization getTokenization(String text) {
String[] tokens = text.split("\\s");
List<IntPair> characterOffsets = new ArrayList<>();
int[] sentenceEndArray = {tokens.length};

int charOffsetBegin = 0;
int charOffsetEnd = 0;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (Character.isWhitespace(c)) {
charOffsetEnd = i;
IntPair tokenOffsets = new IntPair(charOffsetBegin, charOffsetEnd);
characterOffsets.add(tokenOffsets);
charOffsetBegin = charOffsetEnd + 1;
}
}
IntPair tokenOffsets = new IntPair(charOffsetBegin, text.length());
characterOffsets.add(tokenOffsets);

IntPair[] charOffsetArray = new IntPair[characterOffsets.size()];

for (int i = 0; i < characterOffsets.size(); i++) {
charOffsetArray[i] = characterOffsets.get(i);
}
Tokenizer.Tokenization tokenization =
new Tokenizer.Tokenization(tokens, charOffsetArray, sentenceEndArray);
return tokenization;
}

@Before
public void init(){
TextAnnotationBuilder taBuilder = new BasicTextAnnotationBuilder();
ta = taBuilder.createTextAnnotation(this.corpusId, this.textId, this.text, getTokenization(this.text));
boolean allowOverlappingSpans = true;
overlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
ta, this.score, allowOverlappingSpans);
allowOverlappingSpans = false;
noOverlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
ta, this.score, allowOverlappingSpans);

baseConstituent = new Constituent("BASE", this.score, this.viewName, ta, baseStart, baseEnd);
overlappingConstituent = new Constituent("OVER", this.score, this.viewName, ta, overStart, overEnd);
}

@Test
public void testOverlappingSpans(){
overlappingSpansView.addConstituent(baseConstituent);
overlappingSpansView.addConstituent(overlappingConstituent);
for(Constituent c : overlappingSpansView.getConstituents()){
if(c.getLabel().equals("BASE")) {
assert c.getStartSpan() == this.baseStart;
assert c.getEndSpan() == this.baseEnd;
}else {
assert c.getStartSpan() == this.overStart;
assert c.getEndSpan() == this.overEnd;
}
}
}

@Test(expected=IllegalArgumentException.class)
public void testNoOverlappingSpans(){
noOverlappingSpansView.addConstituent(baseConstituent);
noOverlappingSpansView.addConstituent(overlappingConstituent);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ public List<XmlTextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListE
TextAnnotation ta = sourceTa.getTextAnnotation();
SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS);
compileOffsets(tokens);
SpanLabelView nerView = new SpanLabelView(getMentionViewName(), NAME, ta, 1.0, false);
SpanLabelView nerView = new SpanLabelView(getMentionViewName(), NAME, ta, 1.0, true);

// now pull all mentions we deal with. Start from file list index 1, as index 0 was source
// text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public void initialize(ResourceManager rm) {
public void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
TokenLabelView vu = new TokenLabelView(viewName, ta);
TokenLabelView vu = new TokenLabelView(viewName, ta, true);

for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
Expand Down
2 changes: 1 addition & 1 deletion md/src/main/java/org/cogcomp/md/BIOCombinedReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ private List<Constituent> getTokensFromTAs(){
mentionViewName = ViewNames.MENTION_ERE;
}
View mentionView = ta.getView(mentionViewName);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
String[] token2tags = new String[tokenView.getConstituents().size()];
for (int i = 0; i < token2tags.length; i++){
token2tags[i] = "O";
Expand Down
2 changes: 1 addition & 1 deletion md/src/main/java/org/cogcomp/md/BIOReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ else if (_mode.equals("ColumnFormat")){
for (TextAnnotation ta : taList){
View tokenView = ta.getView(ViewNames.TOKENS);
View mentionView = ta.getView(mentionViewName);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
String[] token2tags = new String[tokenView.getConstituents().size()];
for (int i = 0; i < token2tags.length; i++){
token2tags[i] = "O";
Expand Down
2 changes: 1 addition & 1 deletion md/src/main/java/org/cogcomp/md/ColumnFormatReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public TextAnnotation readSingleFile(String file){
tokens.add(curSentenceArr);
}
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(tokens);
SpanLabelView mentionView = new SpanLabelView("MENTIONS", this.getClass().getCanonicalName(), ta, 1.0f);
SpanLabelView mentionView = new SpanLabelView("MENTIONS", this.getClass().getCanonicalName(), ta, 1.0f, true);
if (mentionTypes.size() != mentions.size()){
System.out.println("ERROR");
}
Expand Down
2 changes: 1 addition & 1 deletion md/src/main/java/org/cogcomp/md/MentionAnnotator.java
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public void addView(TextAnnotation ta) throws AnnotatorException{
throw new AnnotatorException("Missing required view POS");
}
View mentionView = new SpanLabelView(ViewNames.MENTION, MentionAnnotator.class.getCanonicalName(), ta, 1.0f, true);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
View tokenView = ta.getView(ViewNames.TOKENS);
for (int i = tokenView.getStartSpan(); i < tokenView.getEndSpan(); i++){
Constituent currentToken = tokenView.getConstituentsCoveringToken(i).get(0).cloneForNewView("BIO");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public TextAnnotation createTextAnnotation(String corpusId, String textId, Strin
TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
SpanLabelView view =
new SpanLabelView(ViewNames.SENTENCE, NAME, ta, 1.0);
new SpanLabelView(ViewNames.SENTENCE, NAME, ta, 1);

int start = 0;
for (int s : tokenization.getSentenceEndTokenIndexes()) {
Expand Down