Skip to content

Commit 6fa8d4d

Browse files
committed
Refactor somewhat - move all the context-sensitive mappings to their own construction
1 parent c53fa2d commit 6fa8d4d

File tree

1 file changed

+71
-140
lines changed

1 file changed

+71
-140
lines changed

src/edu/stanford/nlp/trees/UniversalPOSMapper.java

Lines changed: 71 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.BufferedReader;
44
import java.io.IOException;
55
import java.io.StringReader;
6+
import java.util.ArrayList;
67
import java.util.List;
78

89
import edu.stanford.nlp.io.RuntimeIOException;
@@ -51,166 +52,95 @@ public class UniversalPOSMapper {
5152
private UniversalPOSMapper() {} // static methods
5253

5354
public static void load() {
54-
String newLine = System.lineSeparator();
55-
String rawPattern = String.join(newLine,
56-
// ------------------------------
57-
// Context-sensitive mappings
55+
operations = new ArrayList<>();
56+
// ------------------------------
57+
// Context-sensitive mappings
58+
// ------------------------------
5859

59-
// TO -> PART (in CONJP phrases)
60-
"@CONJP < TO=target < VB",
61-
"",
62-
"relabel target PART",
63-
"",
60+
// TO -> PART (in CONJP phrases)
61+
String [][] contextMappings = new String [][] {
62+
{ "@CONJP < TO=target < VB", "PART", },
63+
{ "@VP < @VP < (/^TO$/=target <... {/.*/})", "PART", },
64+
{ "@VP <: (/^TO$/=target <... {/.*/})", "PART", },
65+
{ "TO=target <... {/.*/}", "ADP", }, // otherwise TO -> ADP
66+
// Don't do this, we are now treating these as copular constructions
67+
// VB.* -> AUX (for passives where main verb is part of an ADJP)
68+
// @VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
69+
//relabel target AUX",
6470

65-
// TO -> PART
66-
"@VP < @VP < (/^TO$/=target <... {/.*/})",
67-
"",
68-
"relabel target PART",
69-
"",
71+
// VB.* -> AUX (for cases with fronted main VPs)
72+
{ "@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))",
73+
"AUX", },
74+
// VB.* -> AUX (another, rarer case of fronted VPs)
75+
{ "@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))",
76+
"AUX", },
7077

71-
// TO -> PART
72-
"@VP <: (/^TO$/=target <... {/.*/})",
73-
"",
74-
"relabel target PART",
75-
"",
78+
// VB.* -> AUX (passive, case 2)
79+
//"%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))",
80+
//"%relabel target AUX",
81+
// VB.* -> AUX (active, case 1)
82+
{ "VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)",
83+
"AUX", },
7684

77-
// TO -> ADP (otherwise)
78-
"TO=target <... {/.*/}",
79-
"",
80-
"relabel target ADP",
81-
"",
85+
// VB -> AUX (active, case 2)
86+
{ "@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})", "AUX" },
8287

83-
// Don't do this, we are now treating these as copular constructions
84-
// VB.* -> AUX (for passives where main verb is part of an ADJP)
85-
// @VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
86-
//relabel target AUX",
88+
// otherwise, VB.* -> VERB
89+
{ "/^VB.*/=target <... {/.*/}", "VERB", },
8790

88-
// VB.* -> AUX (for cases with fronted main VPs)
89-
"@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))",
90-
"",
91-
"relabel target AUX",
92-
"",
91+
// IN -> SCONJ (subordinating conjunctions)
92+
{ "/^SBAR(-[^ ]+)?$/ < (IN=target $++ @S|FRAG|SBAR|SINV <... {/.*/})", "SCONJ", },
9393

94-
// VB.* -> AUX (another, rarer case of fronted VPs)
95-
"@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))",
96-
"",
97-
"relabel target AUX",
98-
"",
99-
100-
// VB.* -> AUX (passive, case 2)
101-
"%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))",
102-
"",
103-
"%relabel target AUX",
104-
"",
105-
106-
// VB.* -> AUX (active, case 1)
107-
"VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)",
108-
"",
109-
"relabel target AUX",
110-
"",
111-
112-
// VB -> AUX (active, case 2)
113-
"@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})",
114-
"",
115-
"relabel target AUX",
116-
"",
117-
118-
// VB.* -> VERB
119-
"/^VB.*/=target <... {/.*/}",
120-
"",
121-
"relabel target VERB",
122-
"",
123-
124-
// IN -> SCONJ (subordinating conjunctions)
125-
"/^SBAR(-[^ ]+)?$/ < (IN=target $++ @S|FRAG|SBAR|SINV <... {/.*/})",
126-
"",
127-
"relabel target SCONJ",
128-
"",
129-
130-
// IN -> SCONJ (subordinating conjunctions II)
131-
"@PP < (IN=target $+ @SBAR|S)",
132-
"",
133-
"relabel target SCONJ",
134-
"",
94+
// IN -> SCONJ (subordinating conjunctions II)
95+
{ "@PP < (IN=target $+ @SBAR|S)", "SCONJ" },
13596

136-
// IN -> ADP (otherwise)
137-
"IN=target < __",
138-
"",
139-
"relabel target ADP",
140-
"",
97+
// IN -> ADP (otherwise)
98+
{ "IN=target < __", "ADP" },
14199

142-
// NN -> SYM (in case of the percent sign)
143-
"NN=target <... {/\\\\%/}",
144-
"",
145-
"relabel target SYM",
146-
"",
100+
// NN -> SYM (in case of the percent sign)
101+
{ "NN=target <... {/[%]/}", "SYM" },
147102

148-
// fused det-noun pronouns -> PRON
149-
"NN=target < (/^(?i:(somebody|something|someone|anybody|anything|anyone|everybody|everything|everyone|nobody|nothing))$/)",
150-
"",
151-
"relabel target PRON",
152-
"",
103+
// fused det-noun pronouns -> PRON
104+
{ "NN=target < (/^(?i:(somebody|something|someone|anybody|anything|anyone|everybody|everything|everyone|nobody|nothing))$/)",
105+
"PRON" },
153106

154-
// NN -> NOUN (otherwise)
155-
"NN=target <... {/.*/}",
156-
"",
157-
"relabel target NOUN",
158-
"",
107+
// NN -> NOUN (otherwise)
108+
{ "NN=target <... {/.*/}", "NOUN" },
159109

160-
// NFP -> PUNCT (in case of possibly repeated hyphens, asterisks or tildes)
161-
"NFP=target <... {/^(~+|\\*+|\\-+)$/}",
162-
"",
163-
"relabel target PUNCT",
164-
"",
110+
// NFP -> PUNCT (in case of possibly repeated hyphens, asterisks or tildes)
111+
{ "NFP=target <... {/^(~+|\\*+|\\-+)$/}", "PUNCT", },
165112

166-
// NFP -> SYM (otherwise)
167-
"NFP=target <... {/.*/}",
168-
"",
169-
"relabel target SYM",
170-
"",
113+
// NFP -> SYM (otherwise)
114+
{ "NFP=target <... {/.*/}", "SYM" },
171115

172-
// RB -> PART when it is verbal negation (not or its reductions)
173-
"@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)",
174-
"",
175-
"relabel target PART",
176-
"",
116+
// RB -> PART when it is verbal negation (not or its reductions)
117+
{ "@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)", "PART" },
177118

178-
// Otherwise RB -> ADV
179-
"RB=target <... {/.*/}",
180-
"",
181-
"relabel target ADV",
182-
"",
119+
// Otherwise RB -> ADV
120+
{ "RB=target <... {/.*/}", "ADV" },
183121

184-
// DT -> PRON (pronominal this/that/these/those)
185-
"@NP <: (DT=target < /^(?i:th(is|at|ose|ese))$/)",
186-
"",
187-
"relabel target PRON",
188-
"",
122+
// DT -> PRON (pronominal this/that/these/those)
123+
{ "@NP <: (DT=target < /^(?i:th(is|at|ose|ese))$/)", "PRON", },
189124

190-
// DT -> DET
191-
"DT=target < __",
192-
"",
193-
"relabel target DET",
194-
"",
125+
// DT -> DET
126+
{ "DT=target < __", "DET" },
195127

196-
// WDT -> PRON (pronominal that/which)
197-
"@WHNP|NP <: (WDT=target < /^(?i:(that|which))$/)",
198-
"",
199-
"relabel target PRON",
200-
"",
128+
// WDT -> PRON (pronominal that/which)
129+
{ "@WHNP|NP <: (WDT=target < /^(?i:(that|which))$/)", "PRON" },
201130

202-
// WDT->SCONJ (incorrectly tagged subordinating conjunctions)
203-
"@SBAR < (WDT=target < /^(?i:(that|which))$/)",
204-
"",
205-
"relabel target SCONJ",
206-
"",
131+
// WDT->SCONJ (incorrectly tagged subordinating conjunctions)
132+
{ "@SBAR < (WDT=target < /^(?i:(that|which))$/)", "SCONJ" },
207133

208-
// WDT -> DET
209-
"WDT=target <... {/.*/}",
210-
"",
211-
"relabel target DET",
212-
"",
134+
// WDT -> DET
135+
{ "WDT=target <... {/.*/}", "DET" },
136+
};
137+
for (String[] newOp : contextMappings) {
138+
operations.add(new Pair<>(TregexPattern.compile(newOp[0]),
139+
Tsurgeon.parseOperation("relabel target " + newOp[1])));
213140

141+
}
142+
String newLine = System.lineSeparator();
143+
String rawPattern = String.join(newLine,
214144
// ------------------------------
215145
// 1 to 1 mappings
216146
// ------------------------------
@@ -436,7 +366,8 @@ public static void load() {
436366
"relabel target X");
437367
StringReader reader = new StringReader(rawPattern);
438368
try (BufferedReader buffered = new BufferedReader(reader)) {
439-
operations = Tsurgeon.getOperationsFromReader(buffered, new TregexPatternCompiler());
369+
List<Pair<TregexPattern, TsurgeonPattern>> newOperations = Tsurgeon.getOperationsFromReader(buffered, new TregexPatternCompiler());
370+
operations.addAll(newOperations);
440371
} catch (IOException e) {
441372
throw new RuntimeIOException(e);
442373
}

0 commit comments

Comments
 (0)