|
7 | 7 | import edu.stanford.nlp.ling.CoreAnnotations;
|
8 | 8 | import edu.stanford.nlp.ling.CoreLabel;
|
9 | 9 | import edu.stanford.nlp.ling.IndexedWord;
|
| 10 | +import edu.stanford.nlp.ling.Label; |
10 | 11 | import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
|
11 | 12 | import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
|
12 | 13 | import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
|
@@ -1279,6 +1280,26 @@ public TregexHandler(Predicate<Properties> authenticator, Consumer<FinishedReque
|
1279 | 1280 | this.authenticator = authenticator;
|
1280 | 1281 | }
|
1281 | 1282 |
|
| 1283 | + public void setTregexOffsets(JSONOutputter.Writer writer, Tree match) { |
| 1284 | + List<Tree> leaves = match.getLeaves(); |
| 1285 | + Label label = leaves.get(0).label(); |
| 1286 | + if (label instanceof CoreLabel) { |
| 1287 | + CoreLabel core = (CoreLabel) label; |
| 1288 | + writer.set("characterOffsetBegin", core.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); |
| 1289 | + if (core.containsKey(CoreAnnotations.CodepointOffsetBeginAnnotation.class)) { |
| 1290 | + writer.set("codepointOffsetBegin", core.get(CoreAnnotations.CodepointOffsetBeginAnnotation.class)); |
| 1291 | + } |
| 1292 | + } |
| 1293 | + label = leaves.get(leaves.size() - 1).label(); |
| 1294 | + if (label instanceof CoreLabel) { |
| 1295 | + CoreLabel core = (CoreLabel) label; |
| 1296 | + writer.set("characterOffsetEnd", core.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); |
| 1297 | + if (core.containsKey(CoreAnnotations.CodepointOffsetEndAnnotation.class)) { |
| 1298 | + writer.set("codepointOffsetEnd", core.get(CoreAnnotations.CodepointOffsetEndAnnotation.class)); |
| 1299 | + } |
| 1300 | + } |
| 1301 | + } |
| 1302 | + |
1282 | 1303 | @Override
|
1283 | 1304 | public void handle(HttpExchange httpExchange) throws IOException {
|
1284 | 1305 | if (onBlockList(httpExchange)) {
|
@@ -1318,17 +1339,21 @@ public void handle(HttpExchange httpExchange) throws IOException {
|
1318 | 1339 | // Run Tregex
|
1319 | 1340 | return Pair.makePair(JSONOutputter.JSONWriter.objectToJSON((docWriter) ->
|
1320 | 1341 | docWriter.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> {
|
| 1342 | + int sentIndex = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); |
1321 | 1343 | Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
|
1322 | 1344 | //sentWriter.set("tree", tree.pennString());
|
1323 | 1345 | TregexMatcher matcher = p.matcher(tree);
|
1324 | 1346 |
|
1325 | 1347 | int i = 0;
|
1326 | 1348 | while (matcher.find()) {
|
1327 | 1349 | sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> {
|
| 1350 | + matchWriter.set("sentIndex", sentIndex); |
| 1351 | + setTregexOffsets(matchWriter, matcher.getMatch()); |
1328 | 1352 | matchWriter.set("match", matcher.getMatch().pennString());
|
1329 | 1353 | matchWriter.set("spanString", matcher.getMatch().spanString());
|
1330 | 1354 | matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) ->
|
1331 | 1355 | namedNodeWriter.set(nodeName, (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeSubWriter) -> {
|
| 1356 | + setTregexOffsets(namedNodeSubWriter, matcher.getNode(nodeName)); |
1332 | 1357 | namedNodeSubWriter.set("match", matcher.getNode(nodeName).pennString());
|
1333 | 1358 | namedNodeSubWriter.set("spanString", matcher.getNode(nodeName).spanString());
|
1334 | 1359 | })
|
|
0 commit comments