Skip to content

Commit b95cd18

Browse files
AngledLuffaStanford NLP
authored andcommitted
Add character offsets to tregex responses in the server
1 parent 48b8dd4 commit b95cd18

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import edu.stanford.nlp.ling.CoreAnnotations;
88
import edu.stanford.nlp.ling.CoreLabel;
99
import edu.stanford.nlp.ling.IndexedWord;
10+
import edu.stanford.nlp.ling.Label;
1011
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
1112
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
1213
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
@@ -1279,6 +1280,26 @@ public TregexHandler(Predicate<Properties> authenticator, Consumer<FinishedReque
12791280
this.authenticator = authenticator;
12801281
}
12811282

1283+
public void setTregexOffsets(JSONOutputter.Writer writer, Tree match) {
1284+
List<Tree> leaves = match.getLeaves();
1285+
Label label = leaves.get(0).label();
1286+
if (label instanceof CoreLabel) {
1287+
CoreLabel core = (CoreLabel) label;
1288+
writer.set("characterOffsetBegin", core.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
1289+
if (core.containsKey(CoreAnnotations.CodepointOffsetBeginAnnotation.class)) {
1290+
writer.set("codepointOffsetBegin", core.get(CoreAnnotations.CodepointOffsetBeginAnnotation.class));
1291+
}
1292+
}
1293+
label = leaves.get(leaves.size() - 1).label();
1294+
if (label instanceof CoreLabel) {
1295+
CoreLabel core = (CoreLabel) label;
1296+
writer.set("characterOffsetEnd", core.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
1297+
if (core.containsKey(CoreAnnotations.CodepointOffsetEndAnnotation.class)) {
1298+
writer.set("codepointOffsetEnd", core.get(CoreAnnotations.CodepointOffsetEndAnnotation.class));
1299+
}
1300+
}
1301+
}
1302+
12821303
@Override
12831304
public void handle(HttpExchange httpExchange) throws IOException {
12841305
if (onBlockList(httpExchange)) {
@@ -1318,17 +1339,21 @@ public void handle(HttpExchange httpExchange) throws IOException {
13181339
// Run Tregex
13191340
return Pair.makePair(JSONOutputter.JSONWriter.objectToJSON((docWriter) ->
13201341
docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> {
1342+
int sentIndex = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
13211343
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
13221344
//sentWriter.set("tree", tree.pennString());
13231345
TregexMatcher matcher = p.matcher(tree);
13241346

13251347
int i = 0;
13261348
while (matcher.find()) {
13271349
sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> {
1350+
matchWriter.set("sentIndex", sentIndex);
1351+
setTregexOffsets(matchWriter, matcher.getMatch());
13281352
matchWriter.set("match", matcher.getMatch().pennString());
13291353
matchWriter.set("spanString", matcher.getMatch().spanString());
13301354
matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) ->
13311355
namedNodeWriter.set(nodeName, (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeSubWriter) -> {
1356+
setTregexOffsets(namedNodeSubWriter, matcher.getNode(nodeName));
13321357
namedNodeSubWriter.set("match", matcher.getNode(nodeName).pennString());
13331358
namedNodeSubWriter.set("spanString", matcher.getNode(nodeName).spanString());
13341359
})

0 commit comments

Comments
 (0)