1
- /**
2
- * This file is part of FNLP (formerly FudanNLP).
3
- *
4
- * FNLP is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU Lesser General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * FNLP is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU Lesser General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with FudanNLP. If not, see <http://www.gnu.org/licenses/>.
16
- *
17
- * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18
- */
19
-
1
+ /**
2
+ * This file is part of FNLP (formerly FudanNLP).
3
+ *
4
+ * FNLP is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Lesser General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * FNLP is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Lesser General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with FudanNLP. If not, see <http://www.gnu.org/licenses/>.
16
+ *
17
+ * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18
+ */
19
+
20
20
package org .fnlp .app .lucene ;
21
21
22
22
import java .io .IOException ;
@@ -81,8 +81,9 @@ public boolean incrementToken() throws IOException {
81
81
posBuffer = Arrays .asList (p );
82
82
tokenIter = tokenBuffer .iterator ();
83
83
posIter = posBuffer .iterator ();
84
- idx = 0 ;
85
- /*
84
+ // idx = 0;
85
+ idx = tokStart ;
86
+ /*
86
87
* it should not be possible to have a sentence with 0 words, check just in case.
87
88
* returning EOS isn't the best either, but its the behavior of the original code.
88
89
*/
@@ -103,7 +104,7 @@ public boolean incrementToken() throws IOException {
103
104
if (hasIllegalOffsets ) {
104
105
offsetAtt .setOffset (tokStart , tokEnd );
105
106
} else {
106
- offsetAtt .setOffset (idx , end - 1 );
107
+ offsetAtt .setOffset (idx , end );
107
108
}
108
109
idx = end ;
109
110
typeAtt .setType ("word" );
0 commit comments