@@ -35,6 +35,32 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
35
35
return printSemanticGraph (basicSg , enhancedSg , true , basicSg .getComments ());
36
36
}
37
37
38
+ // TODO: put in the same place as CoNLLUReader::unescapeSpacesAfter
39
+ public static String escapeSpaces (String after ) {
40
+ StringBuilder result = new StringBuilder ();
41
+ for (int i = 0 ; i < after .length (); ++i ) {
42
+ char next = after .charAt (i );
43
+ if (next == ' ' ) {
44
+ result .append ("\\ s" );
45
+ } else if (next == '\t' ) {
46
+ result .append ("\\ t" );
47
+ } else if (next == '\r' ) {
48
+ result .append ("\\ r" );
49
+ } else if (next == '\n' ) {
50
+ result .append ("\\ n" );
51
+ } else if (next == '|' ) {
52
+ result .append ("\\ p" );
53
+ } else if (next == '\\' ) {
54
+ result .append ("\\ \\ " );
55
+ } else if (next == ' ' ) {
56
+ result .append ("\\ u00A0" );
57
+ } else {
58
+ result .append (next );
59
+ }
60
+ }
61
+ return result .toString ();
62
+ }
63
+
38
64
public String printSemanticGraph (SemanticGraph basicSg , SemanticGraph enhancedSg , boolean unescapeParenthesis , Collection <String > comments ) {
39
65
StringBuilder sb = new StringBuilder ();
40
66
@@ -97,21 +123,29 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
97
123
String relnName = reln == null ? "_" : reln .toString ();
98
124
99
125
// don't use after() directly; it returns a default of ""
100
- // TODO: does this handle SpaceAfter on other tokens or SpacesAfter?
101
- if (token .get (CoreAnnotations .AfterAnnotation .class ) != null && token .after ().equals ("" )) {
102
- IndexedWord nextVertex = tokenSg .getNodeByIndexSafe (token .index () + 1 );
103
- // the next word needs to exist and be part of the same MWT
104
- // and either this word is the start of the MWT
105
- // or this word is the middle of the same MWT as the next word
106
- // if that is true, we will skip the SpaceAfter annotation
107
- boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
108
- ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
109
- (isMWTbutNotStart (token ))));
110
- if (!inMWT ) {
111
- if (misc .equals ("_" )) {
112
- misc = "SpaceAfter=No" ;
126
+ // TODO: also print SpacesBefore on the first token
127
+ if (token .get (CoreAnnotations .AfterAnnotation .class ) != null ) {
128
+ String after = token .after ();
129
+ if (!after .equals (" " )) {
130
+ if (after .equals ("" )) {
131
+ after = "SpaceAfter=No" ;
113
132
} else {
114
- misc = misc + "|SpaceAfter=No" ;
133
+ after = "SpacesAfter=" + escapeSpaces (after );
134
+ }
135
+ IndexedWord nextVertex = tokenSg .getNodeByIndexSafe (token .index () + 1 );
136
+ // the next word needs to exist and be part of the same MWT
137
+ // and either this word is the start of the MWT
138
+ // or this word is the middle of the same MWT as the next word
139
+ // if that is true, we will skip the SpaceAfter annotation
140
+ boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
141
+ ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
142
+ (isMWTbutNotStart (token ))));
143
+ if (!inMWT ) {
144
+ if (misc .equals ("_" )) {
145
+ misc = after ;
146
+ } else {
147
+ misc = misc + "|" + after ;
148
+ }
115
149
}
116
150
}
117
151
}
0 commit comments