Skip to content

Commit 40cb460

Browse files
committed
Oops, need to set the SentenceIndexAnnotation on the empty tokens
1 parent 54041db commit 40cb460

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

itest/src/edu/stanford/nlp/pipeline/CoNLLUReaderITest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ public void testReadingInEmpties() throws ClassNotFoundException, IOException {
353353
CoreLabel empty = emptyTokens.get(0);
354354
assertEquals(8, empty.index());
355355
assertEquals(Integer.valueOf(1), empty.get(CoreAnnotations.EmptyIndexAnnotation.class));
356+
assertEquals(0, empty.sentIndex());
356357
assertEquals("reported", empty.value());
357358
}
358359

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,11 @@ public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument doc) {
348348
tokens.add(token);
349349
documentIdx++;
350350
}
351+
if (sentence.containsKey(CoreAnnotations.EmptyTokensAnnotation.class)) {
352+
for (CoreLabel token : sentence.get(CoreAnnotations.EmptyTokensAnnotation.class)) {
353+
token.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIdx);
354+
}
355+
}
351356
sentenceIdx++;
352357
}
353358
// make sure to set docText AFTER all the above processing

0 commit comments

Comments
 (0)