Skip to content

Commit c3d2dec

Browse files
committed
Update whitespace
1 parent b6ba831 commit c3d2dec

File tree

1 file changed

+161
-171
lines changed

1 file changed

+161
-171
lines changed

src/edu/stanford/nlp/trees/ud/CoNLLUDocumentWriter.java

Lines changed: 161 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -18,148 +18,140 @@
1818
*/
1919
public class CoNLLUDocumentWriter {
2020

21+
private static final String LRB_PATTERN = "(?i)-LRB-";
22+
private static final String RRB_PATTERN = "(?i)-RRB-";
2123

22-
private static final String LRB_PATTERN = "(?i)-LRB-";
23-
private static final String RRB_PATTERN = "(?i)-RRB-";
2424

25+
public String printSemanticGraph(SemanticGraph basicSg) {
26+
return printSemanticGraph(basicSg, null, true);
27+
}
2528

26-
public String printSemanticGraph(SemanticGraph basicSg) {
27-
return printSemanticGraph(basicSg, null, true);
28-
}
29+
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg) {
30+
return printSemanticGraph(basicSg, enhancedSg, true);
31+
}
2932

30-
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg) {
31-
return printSemanticGraph(basicSg, enhancedSg, true);
32-
}
33+
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg, boolean unescapeParenthesis) {
34+
StringBuilder sb = new StringBuilder();
3335

34-
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg, boolean unescapeParenthesis) {
36+
/* Print comments. */
37+
for (String comment : basicSg.getComments()) {
38+
sb.append(comment).append(System.lineSeparator());
39+
}
3540

41+
SemanticGraph tokenSg = enhancedSg != null ? enhancedSg : basicSg;
3642

43+
for (IndexedWord token : tokenSg.vertexListSorted()) {
44+
/* Check for multiword tokens. */
45+
if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
46+
printSpan(sb, token);
47+
} else if (token.containsKey(CoreAnnotations.IsFirstWordOfMWTAnnotation.class) && token.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class)) {
48+
printMWT(sb, tokenSg, token);
49+
}
3750

38-
StringBuilder sb = new StringBuilder();
51+
/* Try to find main governor and additional dependencies. */
52+
IndexedWord gov = basicSg.containsVertex(token) ? basicSg.getParent(token) : null;
53+
String govIdx = gov != null ? gov.toCopyIndex() : null;
54+
GrammaticalRelation reln = gov != null ? basicSg.getEdge(gov, token).getRelation() : null;
55+
56+
HashMap<String, String> enhancedDependencies = new HashMap<>();
57+
if (enhancedSg != null) {
58+
for (IndexedWord parent : enhancedSg.getParents(token)) {
59+
SemanticGraphEdge edge = enhancedSg.getEdge(parent, token);
60+
String relationString = edge.getRelation().toString();
61+
// for Joakim
62+
//if (edge.getWeight() == 1.0) {
63+
// relationString = relationString + ":ENH_CONTROL";
64+
//} else if (edge.getWeight() == 3.0) {
65+
// relationString = relationString + ":ENH_RELCL";
66+
//} else if (edge.getWeight() == 4.0) {
67+
// relationString = relationString + ":ENH_GAPPING";
68+
//} else if (edge.getWeight() == 5.0) {
69+
// relationString = relationString + ":ENH_CONJ_PROP";
70+
//}
71+
enhancedDependencies.put(parent.toCopyIndex(), relationString);
72+
}
73+
} else {
74+
// add enhanced ones stored with token
75+
HashMap<String, String> secondaryDeps = token.get(CoreAnnotations.CoNLLUSecondaryDepsAnnotation.class);
76+
if (secondaryDeps != null) {
77+
enhancedDependencies.putAll(token.get(CoreAnnotations.CoNLLUSecondaryDepsAnnotation.class));
78+
//add basic dependency
79+
if (gov != null) {
80+
enhancedDependencies.put(govIdx, reln.toString());
81+
}
82+
}
83+
}
3984

40-
/* Print comments. */
41-
for (String comment : basicSg.getComments()) {
42-
sb.append(comment).append(System.lineSeparator());
85+
String additionalDepsString = CoNLLUUtils.toExtraDepsString(enhancedDependencies);
86+
String word = token.word();
87+
String featuresString = CoNLLUFeatures.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
88+
String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
89+
String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
90+
String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
91+
String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
92+
String relnName = reln == null ? "_" : reln.toString();
93+
94+
// don't use after() directly; it returns a default of ""
95+
// TODO: does this handle SpaceAfter on other tokens or SpacesAfter?
96+
if (token.get(CoreAnnotations.AfterAnnotation.class) != null && token.after().equals("")) {
97+
IndexedWord nextVertex = tokenSg.getNodeByIndexSafe(token.index() + 1);
98+
// the next word needs to exist and be part of the same MWT
99+
// and either this word is the start of the MWT
100+
// or this word is the middle of the same MWT as the next word
101+
// if that is true, we will skip the SpaceAfter annotation
102+
boolean inMWT = ((nextVertex != null && isMWTbutNotStart(nextVertex)) &&
103+
((token.containsKey(CoreAnnotations.IsFirstWordOfMWTAnnotation.class) && token.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class)) ||
104+
(isMWTbutNotStart(token))));
105+
if (!inMWT) {
106+
if (misc.equals("_")) {
107+
misc = "SpaceAfter=No";
108+
} else {
109+
misc = misc + "|SpaceAfter=No";
110+
}
43111
}
112+
}
44113

45-
SemanticGraph tokenSg = enhancedSg != null ? enhancedSg : basicSg;
46-
47-
for (IndexedWord token : tokenSg.vertexListSorted()) {
48-
/* Check for multiword tokens. */
49-
if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
50-
printSpan(sb, token);
51-
} else if (token.containsKey(CoreAnnotations.IsFirstWordOfMWTAnnotation.class) && token.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class)) {
52-
printMWT(sb, tokenSg, token);
53-
}
54-
55-
/* Try to find main governor and additional dependencies. */
56-
IndexedWord gov = basicSg.containsVertex(token) ? basicSg.getParent(token) : null;
57-
String govIdx = gov != null ? gov.toCopyIndex() : null;
58-
GrammaticalRelation reln = gov != null ? basicSg.getEdge(gov, token).getRelation() : null;
59-
60-
HashMap<String, String> enhancedDependencies = new HashMap<>();
61-
if (enhancedSg != null) {
62-
63-
for (IndexedWord parent : enhancedSg.getParents(token)) {
64-
SemanticGraphEdge edge = enhancedSg.getEdge(parent, token);
65-
String relationString = edge.getRelation().toString();
66-
// for Joakim
67-
//if (edge.getWeight() == 1.0) {
68-
// relationString = relationString + ":ENH_CONTROL";
69-
//} else if (edge.getWeight() == 3.0) {
70-
// relationString = relationString + ":ENH_RELCL";
71-
//} else if (edge.getWeight() == 4.0) {
72-
// relationString = relationString + ":ENH_GAPPING";
73-
//} else if (edge.getWeight() == 5.0) {
74-
// relationString = relationString + ":ENH_CONJ_PROP";
75-
//}
76-
enhancedDependencies.put(parent.toCopyIndex(), relationString);
77-
}
78-
79-
} else {
80-
81-
// add enhanced ones stored with token
82-
HashMap<String, String> secondaryDeps = token.get(CoreAnnotations.CoNLLUSecondaryDepsAnnotation.class);
83-
if (secondaryDeps != null) {
84-
enhancedDependencies.putAll(token.get(CoreAnnotations.CoNLLUSecondaryDepsAnnotation.class));
85-
//add basic dependency
86-
if (gov != null) {
87-
enhancedDependencies.put(govIdx, reln.toString());
88-
}
89-
}
90-
}
91-
92-
93-
String additionalDepsString = CoNLLUUtils.toExtraDepsString(enhancedDependencies);
94-
String word = token.word();
95-
String featuresString = CoNLLUFeatures.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
96-
String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
97-
String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
98-
String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
99-
String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
100-
String relnName = reln == null ? "_" : reln.toString();
101-
102-
// don't use after() directly; it returns a default of ""
103-
// TODO: does this handle SpaceAfter on other tokens or SpacesAfter?
104-
if (token.get(CoreAnnotations.AfterAnnotation.class) != null && token.after().equals("")) {
105-
IndexedWord nextVertex = tokenSg.getNodeByIndexSafe(token.index() + 1);
106-
// the next word needs to exist and be part of the same MWT
107-
// and either this word is the start of the MWT
108-
// or this word is the middle of the same MWT as the next word
109-
// if that is true, we will skip the SpaceAfter annotation
110-
boolean inMWT = ((nextVertex != null && isMWTbutNotStart(nextVertex)) &&
111-
((token.containsKey(CoreAnnotations.IsFirstWordOfMWTAnnotation.class) && token.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class)) ||
112-
(isMWTbutNotStart(token))));
113-
if (!inMWT) {
114-
if (misc.equals("_")) {
115-
misc = "SpaceAfter=No";
116-
} else {
117-
misc = misc + "|SpaceAfter=No";
118-
}
119-
}
120-
}
121-
122-
/* Root. */
123-
if (govIdx == null && basicSg.getRoots().contains(token)) {
124-
govIdx = "0";
125-
relnName = GrammaticalRelation.ROOT.toString();
126-
} else if (govIdx == null) {
127-
govIdx = "_";
128-
relnName = "_";
129-
}
130-
131-
if (enhancedSg != null && enhancedSg.getRoots().contains(token)) {
132-
if (enhancedDependencies.isEmpty()) {
133-
additionalDepsString = "0:root";
134-
} else {
135-
additionalDepsString = "0:root|" + additionalDepsString;
136-
}
137-
}
138-
139-
if (unescapeParenthesis) {
140-
word = word.replaceAll(LRB_PATTERN, "(");
141-
word = word.replaceAll(RRB_PATTERN, ")");
142-
lemma = lemma.replaceAll(LRB_PATTERN, "(");
143-
lemma = lemma.replaceAll(RRB_PATTERN, ")");
144-
}
145-
146-
sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.toCopyIndex(), word,
147-
lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
114+
/* Root. */
115+
if (govIdx == null && basicSg.getRoots().contains(token)) {
116+
govIdx = "0";
117+
relnName = GrammaticalRelation.ROOT.toString();
118+
} else if (govIdx == null) {
119+
govIdx = "_";
120+
relnName = "_";
121+
}
122+
123+
if (enhancedSg != null && enhancedSg.getRoots().contains(token)) {
124+
if (enhancedDependencies.isEmpty()) {
125+
additionalDepsString = "0:root";
126+
} else {
127+
additionalDepsString = "0:root|" + additionalDepsString;
148128
}
149-
sb.append(System.lineSeparator());
129+
}
150130

151-
return sb.toString();
131+
if (unescapeParenthesis) {
132+
word = word.replaceAll(LRB_PATTERN, "(");
133+
word = word.replaceAll(RRB_PATTERN, ")");
134+
lemma = lemma.replaceAll(LRB_PATTERN, "(");
135+
lemma = lemma.replaceAll(RRB_PATTERN, ")");
136+
}
137+
138+
sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.toCopyIndex(), word,
139+
lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
152140
}
141+
sb.append(System.lineSeparator());
142+
143+
return sb.toString();
144+
}
153145

154146
/**
155147
* Outputs just one token span (MWT)
156148
*/
157149
public static void printSpan(StringBuilder sb, AbstractCoreLabel token) {
158-
IntPair tokenSpan = token.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class);
159-
if (tokenSpan.getSource() == token.index()) {
160-
String range = String.format("%d-%d", tokenSpan.getSource(), tokenSpan.getTarget());
161-
sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.originalText()));
162-
}
150+
IntPair tokenSpan = token.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class);
151+
if (tokenSpan.getSource() == token.index()) {
152+
String range = String.format("%d-%d", tokenSpan.getSource(), tokenSpan.getTarget());
153+
sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.originalText()));
154+
}
163155
}
164156

165157
/**
@@ -178,22 +170,22 @@ public static boolean isMWTbutNotStart(IndexedWord nextVertex) {
178170
}
179171

180172
public static void printMWT(StringBuilder sb, SemanticGraph graph, IndexedWord token) {
181-
int startIndex = token.index();
182-
int endIndex = startIndex;
183-
// advance endIndex until we reach the end of the sentence, the start of the next MWT,
184-
// or a word which isn't part of any MWT
185-
IndexedWord nextVertex;
186-
while ((nextVertex = graph.getNodeByIndexSafe(endIndex+1)) != null) {
187-
if (!isMWTbutNotStart(nextVertex)) {
188-
break;
189-
}
190-
++endIndex;
173+
int startIndex = token.index();
174+
int endIndex = startIndex;
175+
// advance endIndex until we reach the end of the sentence, the start of the next MWT,
176+
// or a word which isn't part of any MWT
177+
IndexedWord nextVertex;
178+
while ((nextVertex = graph.getNodeByIndexSafe(endIndex+1)) != null) {
179+
if (!isMWTbutNotStart(nextVertex)) {
180+
break;
191181
}
192-
if (startIndex == endIndex) {
193-
return;
194-
}
195-
String range = String.format("%d-%d", startIndex, endIndex);
196-
sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.get(CoreAnnotations.MWTTokenTextAnnotation.class)));
182+
++endIndex;
183+
}
184+
if (startIndex == endIndex) {
185+
return;
186+
}
187+
String range = String.format("%d-%d", startIndex, endIndex);
188+
sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.get(CoreAnnotations.MWTTokenTextAnnotation.class)));
197189
}
198190

199191
/**
@@ -205,42 +197,40 @@ public static void printMWT(StringBuilder sb, SemanticGraph graph, IndexedWord t
205197
*/
206198

207199
public String printPOSAnnotations(CoreMap sentence, boolean fakeDeps) {
208-
StringBuilder sb = new StringBuilder();
209-
210-
int index = 0;
211-
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
212-
/* Check for multiword tokens. */
213-
if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
214-
printSpan(sb, token);
215-
}
200+
StringBuilder sb = new StringBuilder();
216201

217-
String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
218-
String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
219-
String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
220-
String featuresString = CoNLLUFeatures.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
221-
String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
222-
final String head;
223-
final String rel;
224-
final String headrel;
225-
if (fakeDeps) {
226-
// deps count from 1, with 0 as the root.
227-
// we will have the first word go to fake root
228-
head = Integer.toString(index);
229-
rel = (index == 0) ? "root" : "dep";
230-
headrel = head + ":" + rel;
231-
} else {
232-
head = "_";
233-
rel = "_";
234-
headrel = "_";
235-
}
236-
index++;
237-
sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(),
238-
lemma, upos , pos, featuresString, head, rel, headrel, misc));
202+
int index = 0;
203+
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
204+
/* Check for multiword tokens. */
205+
if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
206+
printSpan(sb, token);
239207
}
240-
sb.append(System.lineSeparator());
241-
242-
return sb.toString();
243208

209+
String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
210+
String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
211+
String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
212+
String featuresString = CoNLLUFeatures.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
213+
String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
214+
final String head;
215+
final String rel;
216+
final String headrel;
217+
if (fakeDeps) {
218+
// deps count from 1, with 0 as the root.
219+
// we will have the first word go to fake root
220+
head = Integer.toString(index);
221+
rel = (index == 0) ? "root" : "dep";
222+
headrel = head + ":" + rel;
223+
} else {
224+
head = "_";
225+
rel = "_";
226+
headrel = "_";
227+
}
228+
index++;
229+
sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(),
230+
lemma, upos , pos, featuresString, head, rel, headrel, misc));
244231
}
232+
sb.append(System.lineSeparator());
245233

234+
return sb.toString();
235+
}
246236
}

0 commit comments

Comments
 (0)