CogComp · ChaseDuncan · Aug 30, 2018 · Aug 30, 2018 · Sep 4, 2018 · Sep 4, 2018
diff --git a/...c/main/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/SpanLabelView.java b/...c/main/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/SpanLabelView.java
@@ -5,9 +5,7 @@
  * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
  * http://cogcomp.cs.illinois.edu/
  */
-/**
- *
- */
+
 package edu.illinois.cs.cogcomp.core.datastructures.textannotation;
 
 import java.util.ArrayList;
@@ -60,6 +58,14 @@ public SpanLabelView(String viewName, String viewGenerator, TextAnnotation text,
 
     @Override
     public void addConstituent(Constituent constituent) {
+
+        if (!allowOverlappingSpans) {
+            int start = constituent.getStartSpan();
+            int end = constituent.getEndSpan();
+            if (this.getConstituentsCoveringSpan(start, end).size() != 0)
+                throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");
+        }
+
         super.addConstituent(constituent);
 
         // this sort is grossly inefficient when appending contiguous tokens one at a time. 
@@ -95,9 +101,6 @@ public Constituent addSpanLabel(int start, int end, String label, double score)
                 new Constituent(label, score, this.getViewName(), this.getTextAnnotation(), start,
                         end);
 
-        if (!allowOverlappingSpans && this.getConstituentsCoveringSpan(start, end).size() != 0)
-            throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");
-
         this.addConstituent(c);
 
         return c;

diff --git a/.../main/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/TokenLabelView.java b/.../main/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/TokenLabelView.java
@@ -31,10 +31,19 @@ public TokenLabelView(String viewName, TextAnnotation text) {
         this(viewName, viewName + "-annotator", text, 1.0);
     }
 
+    public TokenLabelView(String viewName, TextAnnotation text, boolean allowOverlappingSpans) {
+        this(viewName, viewName + "-annotator", text, 1.0, allowOverlappingSpans);
+    }
+
     public TokenLabelView(String viewName, String viewGenerator, TextAnnotation text, double score) {
         super(viewName, viewGenerator, text, score);
     }
 
+    public TokenLabelView(String viewName, String viewGenerator, TextAnnotation text, double score,
+                          boolean allowOverlappingSpans) {
+        super(viewName, viewGenerator, text, score, allowOverlappingSpans);
+    }
+
     /**
      * Adds a label to a token and returns the newly created constituent.
      *

diff --git a/...st/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/SpanLabelViewTest.java b/...st/java/edu/illinois/cs/cogcomp/core/datastructures/textannotation/SpanLabelViewTest.java
@@ -0,0 +1,104 @@
+package edu.illinois.cs.cogcomp.core.datastructures.textannotation;
+
+import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder;
+import edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder;
+import edu.illinois.cs.cogcomp.core.datastructures.IntPair;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView;
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
+import edu.illinois.cs.cogcomp.nlp.tokenizer.Tokenizer;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Test that addConstituent(Constituent) does not allow overlapping spans
+ */
+public class SpanLabelViewTest {
+    SpanLabelView overlappingSpansView;
+    SpanLabelView noOverlappingSpansView;
+    TextAnnotation ta;
+    Constituent baseConstituent;
+    Constituent overlappingConstituent;
+
+    private Tokenizer.Tokenization tokenization;
+
+    String viewName = "VIEWNAME";
+    String viewGenerator = "VIEW-GENERATOR";
+    String text = "This is a test string; do not pay it any mind.";
+    String corpusId = "TEST";
+    String textId = "ID";
+
+    double score = 42.0;
+    int baseStart = 0;
+    int baseEnd = 5;
+    int overStart = 2;
+    int overEnd = 6;
+
+    private Tokenizer.Tokenization getTokenization(String text) {
+        String[] tokens = text.split("\\s");
+        List<IntPair> characterOffsets = new ArrayList<>();
+        int[] sentenceEndArray = {tokens.length};
+
+        int charOffsetBegin = 0;
+        int charOffsetEnd = 0;
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+            if (Character.isWhitespace(c)) {
+                charOffsetEnd = i;
+                IntPair tokenOffsets = new IntPair(charOffsetBegin, charOffsetEnd);
+                characterOffsets.add(tokenOffsets);
+                charOffsetBegin = charOffsetEnd + 1;
+            }
+        }
+        IntPair tokenOffsets = new IntPair(charOffsetBegin, text.length());
+        characterOffsets.add(tokenOffsets);
+
+        IntPair[] charOffsetArray = new IntPair[characterOffsets.size()];
+
+        for (int i = 0; i < characterOffsets.size(); i++) {
+            charOffsetArray[i] = characterOffsets.get(i);
+        }
+        Tokenizer.Tokenization tokenization =
+                new Tokenizer.Tokenization(tokens, charOffsetArray, sentenceEndArray);
+        return tokenization;
+    }
+
+    @Before
+    public void init(){
+        TextAnnotationBuilder taBuilder = new BasicTextAnnotationBuilder();
+        ta = taBuilder.createTextAnnotation(this.corpusId, this.textId, this.text, getTokenization(this.text));
+        boolean allowOverlappingSpans = true;
+        overlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
+                ta, this.score, allowOverlappingSpans);
+        allowOverlappingSpans = false;
+        noOverlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
+                ta, this.score, allowOverlappingSpans);
+
+        baseConstituent = new Constituent("BASE", this.score, this.viewName, ta, baseStart, baseEnd);
+        overlappingConstituent = new Constituent("OVER", this.score, this.viewName, ta, overStart, overEnd);
+    }
+
+    @Test
+    public void testOverlappingSpans(){
+        overlappingSpansView.addConstituent(baseConstituent);
+        overlappingSpansView.addConstituent(overlappingConstituent);
+        for(Constituent c : overlappingSpansView.getConstituents()){
+            if(c.getLabel().equals("BASE")) {
+                assert c.getStartSpan() == this.baseStart;
+                assert c.getEndSpan() == this.baseEnd;
+            }else {
+                assert c.getStartSpan() == this.overStart;
+                assert c.getEndSpan() == this.overEnd;
+            }
+        }
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testNoOverlappingSpans(){
+        noOverlappingSpansView.addConstituent(baseConstituent);
+        noOverlappingSpansView.addConstituent(overlappingConstituent);
+    }
+}
diff --git a/...aders/src/main/java/edu/illinois/cs/cogcomp/nlp/corpusreaders/ereReader/ERENerReader.java b/...aders/src/main/java/edu/illinois/cs/cogcomp/nlp/corpusreaders/ereReader/ERENerReader.java
@@ -173,7 +173,7 @@ public List<XmlTextAnnotation> getAnnotationsFromFile(List<Path> corpusFileListE
         TextAnnotation ta = sourceTa.getTextAnnotation();
         SpanLabelView tokens = (SpanLabelView) ta.getView(ViewNames.TOKENS);
         compileOffsets(tokens);
-        SpanLabelView nerView = new SpanLabelView(getMentionViewName(), NAME, ta, 1.0, false);
+        SpanLabelView nerView = new SpanLabelView(getMentionViewName(), NAME, ta, 1.0, true);
 
         // now pull all mentions we deal with. Start from file list index 1, as index 0 was source
         // text

diff --git a/....3.1/src/main/java/edu/illinois/cs/cogcomp/pipeline/handlers/StanfordTrueCaseHandler.java b/....3.1/src/main/java/edu/illinois/cs/cogcomp/pipeline/handlers/StanfordTrueCaseHandler.java
@@ -54,7 +54,7 @@ public void initialize(ResourceManager rm) {
     public void addView(TextAnnotation ta) throws AnnotatorException {
         Annotation document = new Annotation(ta.text);
         pipeline.annotate(document);
-        TokenLabelView vu = new TokenLabelView(viewName, ta);
+        TokenLabelView vu = new TokenLabelView(viewName, ta, true);
 
         for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
             for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

diff --git a/md/src/main/java/org/cogcomp/md/BIOCombinedReader.java b/md/src/main/java/org/cogcomp/md/BIOCombinedReader.java
@@ -172,7 +172,7 @@ private List<Constituent> getTokensFromTAs(){
                 mentionViewName = ViewNames.MENTION_ERE;
             }
             View mentionView = ta.getView(mentionViewName);
-            View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
+            View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
             String[] token2tags = new String[tokenView.getConstituents().size()];
             for (int i = 0; i < token2tags.length; i++){
                 token2tags[i] = "O";

diff --git a/md/src/main/java/org/cogcomp/md/BIOReader.java b/md/src/main/java/org/cogcomp/md/BIOReader.java
@@ -180,7 +180,7 @@ else if (_mode.equals("ColumnFormat")){
         for (TextAnnotation ta : taList){
             View tokenView = ta.getView(ViewNames.TOKENS);
             View mentionView = ta.getView(mentionViewName);
-            View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
+            View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
             String[] token2tags = new String[tokenView.getConstituents().size()];
             for (int i = 0; i < token2tags.length; i++){
                 token2tags[i] = "O";

diff --git a/md/src/main/java/org/cogcomp/md/ColumnFormatReader.java b/md/src/main/java/org/cogcomp/md/ColumnFormatReader.java
@@ -118,7 +118,7 @@ public TextAnnotation readSingleFile(String file){
             tokens.add(curSentenceArr);
         }
         TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(tokens);
-        SpanLabelView mentionView = new SpanLabelView("MENTIONS", this.getClass().getCanonicalName(), ta, 1.0f);
+        SpanLabelView mentionView = new SpanLabelView("MENTIONS", this.getClass().getCanonicalName(), ta, 1.0f, true);
         if (mentionTypes.size() != mentions.size()){
             System.out.println("ERROR");
         }

diff --git a/md/src/main/java/org/cogcomp/md/MentionAnnotator.java b/md/src/main/java/org/cogcomp/md/MentionAnnotator.java
@@ -204,7 +204,7 @@ public void addView(TextAnnotation ta) throws AnnotatorException{
             throw new AnnotatorException("Missing required view POS");
         }
         View mentionView = new SpanLabelView(ViewNames.MENTION, MentionAnnotator.class.getCanonicalName(), ta, 1.0f, true);
-        View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f);
+        View bioView = new SpanLabelView("BIO", BIOReader.class.getCanonicalName(), ta, 1.0f, true);
         View tokenView = ta.getView(ViewNames.TOKENS);
         for (int i = tokenView.getStartSpan(); i < tokenView.getEndSpan(); i++){
             Constituent currentToken = tokenView.getConstituentsCoveringToken(i).get(0).cloneForNewView("BIO");

diff --git a/...zer/src/main/java/edu/illinois/cs/cogcomp/nlp/utility/TokenizerTextAnnotationBuilder.java b/...zer/src/main/java/edu/illinois/cs/cogcomp/nlp/utility/TokenizerTextAnnotationBuilder.java
@@ -138,7 +138,7 @@ public TextAnnotation createTextAnnotation(String corpusId, String textId, Strin
         TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
                 tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
         SpanLabelView view =
-                new SpanLabelView(ViewNames.SENTENCE, NAME, ta, 1.0);
+                new SpanLabelView(ViewNames.SENTENCE, NAME, ta, 1);
 
         int start = 0;
         for (int s : tokenization.getSentenceEndTokenIndexes()) {