Skip to content

Commit 7b095f4

Browse files
committed
When processing "not only" and similar phrases into UD, separate them from the CONJP (sometimes ADVP by error) that they show up in. This allows the later part of the converter to connect both of them to the parent with advmod.
As part of this, turn the UPOS of "not" into PART Also, update the corrector to make a few changes to the structure, which may help usages of the trees or of SD as well as the UD. The UD changes are written to accommodate the structural errors in the original PTB, though
1 parent 2945cac commit 7b095f4

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

src/edu/stanford/nlp/trees/CoordinationTransformer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,10 @@ private static Tree findCCparent(Tree t, Tree root) {
728728
private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))");
729729
private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]");
730730

731+
/* "not only" is not a MWE, so break up the CONJP similar to "but also". */
732+
private static final TregexPattern NOT_ONLY_PATTERN = TregexPattern.compile("CONJP|ADVP=conjp < (RB=not < /^(?i)not$/) < (RB=only < /^(?i)only|just|merely|even$/) ?$+ (__=nextNode < (__ < __))");
733+
private static final TsurgeonPattern NOT_ONLY_OPERATION = Tsurgeon.parseOperation("[move not $- conjp] [move only $- not] [if exists nextNode move only >1 nextNode] [if exists nextNode move not >1 nextNode] [createSubtree ADVP not] [createSubtree ADVP only] [delete conjp]");
734+
731735
/* at least / at most / at best / at worst / ... should be treated as if "at"
732736
was a preposition and the RBS was a noun. Assumes that the MWE "at least"
733737
has already been extracted. */
@@ -749,6 +753,7 @@ public static Tree MWETransform(Tree t) {
749753

750754
Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t);
751755
Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t);
756+
Tsurgeon.processPattern(NOT_ONLY_PATTERN, NOT_ONLY_OPERATION, t);
752757
Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t);
753758
Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t);
754759

src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1312,7 +1312,6 @@ private UniversalEnglishGrammaticalRelations() {}
13121312
MODIFIER,
13131313
"S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR|NP(?:-TMP|-ADV)?", tregexCompiler,
13141314
"NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ $++ CC)",
1315-
"NP|NP-TMP|NP-ADV|NX|NML < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)",
13161315
// This matches weird/wrong NP-internal preconjuncts where you get (NP PDT (NP NP CC NP)) or similar
13171316
"NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ ) < (NP < CC)",
13181317
"/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (PDT|DT|CC=target < /^(?i:either|neither|both)$/ $++ CC)",

src/edu/stanford/nlp/trees/UniversalPOSMapper.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ public static void load() {
134134
// RB -> PART when it is verbal negation (not or its reductions)
135135
{ "@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)", "PART" },
136136

137+
// "not" as part of a phrase such as "not only", "not just", etc is tagged as PART in UD
138+
{ "@ADVP|CONJP <1 (RB=target < /^(?i:not|n't|nt|t|n)$/) <2 (__ < only|just|merely|even) !<3 __", "PART" },
139+
137140
// Otherwise RB -> ADV
138141
{ "RB=target <... {/.*/}", "ADV" },
139142

src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,16 @@ private static BufferedReader getBufferedReader(String source) {
168168
"adjoin (NP NN@) newnp\n" +
169169
'\n') +
170170

171+
// Fix not_RB only_JJ, which should generally be not_RB only_RB
172+
// and put it under a CONJP instead of an ADVP
173+
("ADVP|CONJP <1 (__ < /^(?i:not)$/) <2 (JJ=bad < only|just|merely|even) !<3 __\n" +
174+
"relabel bad RB\n" +
175+
'\n') +
176+
177+
("ADVP=bad <1 (__ < /^(?i:not)$/) <2 (RB < only|just|merely|even) !<3 __\n" +
178+
"relabel bad CONJP\n" +
179+
'\n') +
180+
171181
// Fix some cases of 'as well as' not made into a CONJP unit
172182
// There are a few other weird cases that should also be reviewed with the tregex
173183
// well|Well|WELL , as|AS|As . as|AS|As !>(__ > @CONJP)

0 commit comments

Comments
 (0)