Skip to content

Commit 3d2c5d4

Browse files
committed
Fix the sentence fiddling when a document boundary is reached
1 parent 8baa096 commit 3d2c5d4

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,16 +236,17 @@ public class CoNLLUSentence {
236236
* Process line for current sentence. Return true if processing empty line (indicating sentence end)
237237
**/
238238
public boolean processLine(String line) {
239-
if (COMMENT_LINE.matcher(line).matches())
239+
if (COMMENT_LINE.matcher(line).matches()) {
240240
addSentenceData(line);
241-
else if (MWT_LINE.matcher(line).matches())
241+
} else if (MWT_LINE.matcher(line).matches()) {
242242
addMWTData(line);
243-
else if (TOKEN_LINE.matcher(line).matches())
243+
} else if (TOKEN_LINE.matcher(line).matches()) {
244244
tokenLines.add(line);
245-
else if (EMPTY_LINE.matcher(line).matches())
245+
} else if (EMPTY_LINE.matcher(line).matches()) {
246246
emptyLines.add(line);
247-
else
247+
} else {
248248
return true;
249+
}
249250
return false;
250251
}
251252

@@ -301,8 +302,14 @@ public List<CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments(String filePath)
301302
for (String line : lines) {
302303
// if start of a new doc, reset for a new doc
303304
if (DOCUMENT_LINE.matcher(line).matches()) {
305+
// since the next sentence gets added to the previous doc
306+
// (see below), we'll need to remove that
307+
if (docs.size() > 0) {
308+
docs.get(docs.size() - 1).sentences.remove(docs.get(docs.size() - 1).sentences.size() - 1);
309+
}
310+
// the new document comes prebuilt with a blank sentence, so,
311+
// no need to add one here
304312
docs.add(new CoNLLUDocument());
305-
docs.get(docs.size() - 1).sentences.add(new CoNLLUSentence());
306313
}
307314
// read in current line
308315
boolean endSentence = docs.get(docs.size() - 1).lastSentence().processLine(line);

0 commit comments

Comments
 (0)