Skip to content

Commit 614e823

Browse files
committed
fix char offset issue
1 parent 346c2b5 commit 614e823

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/edu/stanford/nlp/process/stattok/StatTokSent.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,14 @@ private ArrayList<Pair<CoreLabel, String>> splitToken(Pair<CoreLabel, String> to
225225
// if token is not the last one, add class C
226226
if (backwardsPartsIterator.hasPrevious()){
227227
int partLength = part.length();
228-
partToken = factory.makeToken(part, token.originalText(), tokenEndPosition-partLength, partLength);
228+
partToken = factory.makeToken(part, token.originalText(), tokenEndPosition-partLength, partLength+1);
229229
tokenEndPosition = tokenEndPosition-partLength;
230230
partTokenAndClass = new Pair<CoreLabel,String>(partToken, "C");
231231
}
232232
// if last token (first part), add original class (S or T)
233233
else{
234234
int partLength = part.length();
235-
partToken = factory.makeToken(part, token.originalText(), tokenBeginPosition, partLength);
235+
partToken = factory.makeToken(part, token.originalText(), tokenBeginPosition, partLength+1);
236236
partTokenAndClass = new Pair<CoreLabel, String>(partToken, originalClass);
237237
}
238238
splittedTokenAndClass.add(0, partTokenAndClass);
@@ -388,7 +388,7 @@ public List<List<CoreLabel>> tokenize(String text){
388388
//If there hasn't been O class between tokens, create token for last word
389389
if (currentWord != ""){
390390
endToken = i-1;
391-
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken);
391+
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken+1);
392392
Pair<CoreLabel, String> tokenAndClass = new Pair<CoreLabel, String>(newToken,lastBeginChar);
393393
sentenceTokensBase.add(tokenAndClass);
394394
tokensCounter++;
@@ -412,7 +412,7 @@ public List<List<CoreLabel>> tokenize(String text){
412412
//If there hasn't been O class between tokens, create token for last word
413413
if (currentWord != ""){
414414
endToken = i-1;
415-
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken);
415+
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken+1);
416416
Pair<CoreLabel, String> tokenAndClass = new Pair<CoreLabel, String>(newToken,lastBeginChar);
417417
sentenceTokensBase.add(tokenAndClass);
418418
tokensCounter++;
@@ -437,7 +437,7 @@ public List<List<CoreLabel>> tokenize(String text){
437437
if (currentClass.equals("O")){
438438
endToken = i-1;
439439
//Create new token with previous character, add it with its class to list, increment token counter
440-
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken);
440+
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken+1);
441441
Pair<CoreLabel, String> tokenAndClass = new Pair<CoreLabel, String>(newToken,lastBeginChar);
442442
sentenceTokensBase.add(tokenAndClass);
443443
tokensCounter++;
@@ -449,7 +449,7 @@ public List<List<CoreLabel>> tokenize(String text){
449449
//End of text
450450
if (i==(classificationResults.size()-1)) {
451451
endToken = i-1;
452-
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken);
452+
CoreLabel newToken = factory.makeToken(currentWord, currentWord, beginToken, endToken-beginToken+1);
453453
Pair<CoreLabel, String> tokenAndClass = new Pair<CoreLabel, String>(newToken,lastBeginChar);
454454
sentenceTokensBase.add(tokenAndClass);
455455
tokensCounter++;

0 commit comments

Comments
 (0)