Skip to content

Commit 723f20d

Browse files
committed
Put comments from the CoNLLU on the annotation
1 parent e2acb52 commit 723f20d

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

src/edu/stanford/nlp/ling/CoreAnnotations.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,16 @@ public Class<String> getType() {
522522
}
523523
}
524524

525+
/**
526+
* Comments on the sentence, such as the ones attached to CoNLLU sentences
527+
*/
528+
public static class CommentsAnnotation implements CoreAnnotation<List<String>> {
529+
@Override
530+
public Class<List<String>> getType() {
531+
return ErasureUtils.uncheckedCast(List.class);
532+
}
533+
}
534+
525535
/**
526536
* CoNLL dep parsing - coarser POS tags.
527537
*/

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ public class CoNLLUReader {
2424
* field constants
2525
**/
2626
// TODO: read sent_id?
27-
// TODO: read comments in general
2827
// TODO: reconsider the newline as the after on the last word
2928
// TODO: keep around the rest of the misc annotations
3029
public static final int CoNLLU_IndexField = 0;
@@ -223,6 +222,8 @@ public class CoNLLUSentence {
223222
public List<String> emptyLines = new ArrayList<>();
224223
// data for the sentence contained in # key values
225224
public HashMap<String, String> sentenceData = new HashMap<>();
225+
// all of the comments, including the ones that showed up in sentenceData
226+
public List<String> comments = new ArrayList<>();
226227
// map indices in token list to mwt data if there is any
227228
HashMap<Integer, Integer> mwtData = new HashMap<>();
228229
// mwt tokens
@@ -259,6 +260,7 @@ public void addSentenceData(String sentenceDataLine) {
259260
String value = sentenceDataLine.substring(sentenceDataLine.indexOf('='));
260261
sentenceData.put(key, value);
261262
}
263+
comments.add(sentenceDataLine);
262264
}
263265

264266
/**
@@ -600,6 +602,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
600602
sentenceCoreMap.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedParse);
601603
}
602604

605+
sentenceCoreMap.set(CoreAnnotations.CommentsAnnotation.class, sentence.comments);
603606
return sentenceCoreMap;
604607
}
605608

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,11 @@ public static void main(String[] args) throws IOException {
580580
} else if (outputFormat == OutputFormat.CONLLU) {
581581
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
582582
String semgrexName = semgrex.toString().trim();
583-
// TODO: comments should load from the CoNLLU document, if applicable
584-
List<String> comments = new ArrayList<>(graph.getComments());
583+
List<String> comments = new ArrayList<>(sentence.get(CoreAnnotations.CommentsAnnotation.class));
584+
// TODO: maybe stop putting comments on the graphs?
585+
if (comments.size() == 0) {
586+
comments.addAll(graph.getComments());
587+
}
585588
boolean found = true;
586589
while (found) {
587590
StringBuilder comment = new StringBuilder();

0 commit comments

Comments
 (0)