@@ -236,16 +236,17 @@ public class CoNLLUSentence {
236
236
* Process line for current sentence. Return true if processing empty line (indicating sentence end)
237
237
**/
238
238
public boolean processLine (String line ) {
239
- if (COMMENT_LINE .matcher (line ).matches ())
239
+ if (COMMENT_LINE .matcher (line ).matches ()) {
240
240
addSentenceData (line );
241
- else if (MWT_LINE .matcher (line ).matches ())
241
+ } else if (MWT_LINE .matcher (line ).matches ()) {
242
242
addMWTData (line );
243
- else if (TOKEN_LINE .matcher (line ).matches ())
243
+ } else if (TOKEN_LINE .matcher (line ).matches ()) {
244
244
tokenLines .add (line );
245
- else if (EMPTY_LINE .matcher (line ).matches ())
245
+ } else if (EMPTY_LINE .matcher (line ).matches ()) {
246
246
emptyLines .add (line );
247
- else
247
+ } else {
248
248
return true ;
249
+ }
249
250
return false ;
250
251
}
251
252
@@ -301,8 +302,14 @@ public List<CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments(String filePath)
301
302
for (String line : lines ) {
302
303
// if start of a new doc, reset for a new doc
303
304
if (DOCUMENT_LINE .matcher (line ).matches ()) {
305
+ // since the next sentence gets added to the previous doc
306
+ // (see below), we'll need to remove that
307
+ if (docs .size () > 0 ) {
308
+ docs .get (docs .size () - 1 ).sentences .remove (docs .get (docs .size () - 1 ).sentences .size () - 1 );
309
+ }
310
+ // the new document comes prebuilt with a blank sentence, so,
311
+ // no need to add one here
304
312
docs .add (new CoNLLUDocument ());
305
- docs .get (docs .size () - 1 ).sentences .add (new CoNLLUSentence ());
306
313
}
307
314
// read in current line
308
315
boolean endSentence = docs .get (docs .size () - 1 ).lastSentence ().processLine (line );
0 commit comments