diff --git a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java index 26639d8f50..52b3171c27 100644 --- a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java +++ b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java @@ -666,17 +666,30 @@ public String toString() { StringBuilder s = new StringBuilder(); s.append("{\n"); s.append("Vertices:\n"); + + List lines = new ArrayList<>(); for (V vertex : outgoingEdges.keySet()) { - s.append(" ").append(vertex).append('\n'); + lines.add(" " + vertex + '\n'); + } + Collections.sort(lines); + for (String line : lines) { + s.append(line); } + s.append("Edges:\n"); + lines = new ArrayList<>(); for (V source : outgoingEdges.keySet()) { for (V dest : outgoingEdges.get(source).keySet()) { for (E edge : outgoingEdges.get(source).get(dest)) { - s.append(" ").append(source).append(" -> ").append(dest).append(" : ").append(edge).append('\n'); + lines.add(" " + source + " -> " + dest + " : " + edge + "\n"); } } } + Collections.sort(lines); + for (String line : lines) { + s.append(line); + } + s.append('}'); return s.toString(); } diff --git a/src/edu/stanford/nlp/trees/CompositeTreeTransformer.java b/src/edu/stanford/nlp/trees/CompositeTreeTransformer.java index 583fdca730..6a7b4dc219 100644 --- a/src/edu/stanford/nlp/trees/CompositeTreeTransformer.java +++ b/src/edu/stanford/nlp/trees/CompositeTreeTransformer.java @@ -2,6 +2,7 @@ import java.util.List; import java.util.ArrayList; +import java.util.Arrays; /** * A TreeTransformer that applies component TreeTransformers in order. @@ -21,6 +22,10 @@ public CompositeTreeTransformer(List tt) { transformers.addAll(tt); } + public CompositeTreeTransformer(TreeTransformer ... tt) { + transformers.addAll(Arrays.asList(tt)); + } + public void addTransformer(TreeTransformer tt) { transformers.add(tt); } diff --git a/src/edu/stanford/nlp/trees/CoordinationTransformer.java b/src/edu/stanford/nlp/trees/CoordinationTransformer.java index 50c2366140..6d5883ae94 100644 --- a/src/edu/stanford/nlp/trees/CoordinationTransformer.java +++ b/src/edu/stanford/nlp/trees/CoordinationTransformer.java @@ -71,6 +71,14 @@ public CoordinationTransformer(HeadFinder hf, boolean performMWETransformation) qp = new QPTreeTransformer(performMWETransformation); } + public void debugLine(String prefix, Tree t) { + if (t instanceof TreeGraphNode) { + log.info(prefix + ((TreeGraphNode) t).toOneLineString()); + } else { + log.info(prefix + t); + } + } + /** * Transforms t if it contains a coordination in a flat structure (CCtransform) * and transforms UCP (UCPtransform). @@ -81,19 +89,19 @@ public CoordinationTransformer(HeadFinder hf, boolean performMWETransformation) @Override public Tree transformTree(Tree t) { if (VERBOSE) { - log.info("Input to CoordinationTransformer: " + t); + debugLine("Input to CoordinationTransformer: ", t); } if (performMWETransformation) { t = gappingTransform(t); if (VERBOSE) { - log.info("After t = gappingTransform(t);\n: " + t); + debugLine("After t = gappingTransform(t);: ", t); } } t = tn.transformTree(t); if (VERBOSE) { - log.info("After DependencyTreeTransformer: " + t); + debugLine("After DependencyTreeTransformer: ", t); } if (t == null) { return t; @@ -102,61 +110,64 @@ public Tree transformTree(Tree t) { if (performMWETransformation) { t = MWETransform(t); if (VERBOSE) { - log.info("After MWETransform: " + t); + debugLine("After MWETransform: ", t); } t = MWFlatTransform(t); if (VERBOSE) { - log.info("After MWFlatTransform: " + t); + debugLine("After MWFlatTransform: ", t); } t = prepCCTransform(t); if (VERBOSE) { - log.info("After prepCCTransform: " + t); + debugLine("After prepCCTransform: ", t); } } t = UCPtransform(t); if (VERBOSE) { - log.info("After UCPTransformer: " + t); + debugLine("After UCPTransformer: ", t); } t = CCtransform(t); if (VERBOSE) { - log.info("After CCTransformer: " + t); + debugLine("After CCTransformer: ", t); } t = qp.transformTree(t); if (VERBOSE) { - log.info("After QPTreeTransformer: " + t); + debugLine("After QPTreeTransformer: ", t); } t = SQflatten(t); if (VERBOSE) { - log.info("After SQ flattening: " + t); + debugLine("After SQ flattening: ", t); } t = dates.transformTree(t); if (VERBOSE) { - log.info("After DateTreeTransformer: " + t); + debugLine("After DateTreeTransformer: ", t); } t = removeXOverX(t); if (VERBOSE) { - log.info("After removeXoverX: " + t); + debugLine("After removeXoverX: ", t); } t = combineConjp(t); if (VERBOSE) { - log.info("After combineConjp: " + t); + debugLine("After combineConjp: ", t); } t = moveRB(t); if (VERBOSE) { - log.info("After moveRB: " + t); + debugLine("After moveRB: ", t); } t = changeSbarToPP(t); if (VERBOSE) { - log.info("After changeSbarToPP: " + t); + debugLine("After changeSbarToPP: ", t); } t = rearrangeNowThat(t); if (VERBOSE) { - log.info("After rearrangeNowThat: " + t); + debugLine("After rearrangeNowThat: ", t); + } + t = mergeYodaVerbs(t); + if (VERBOSE) { + debugLine("After mergeYodaVerbs: ", t); } - return t; } @@ -174,6 +185,19 @@ private static Tree rearrangeNowThat(Tree t) { } + private static final TregexPattern mergeYodaVerbsTregex = + TregexPattern.compile("VP=home < VBN=vbn $+ (VP=willbe <... {(__=will < will|have|has) ; (VP < (__=be << be|been))})"); + + private static final TsurgeonPattern mergeYodaVerbsTsurgeon = + Tsurgeon.parseOperation("[createSubtree VP vbn] [move will >-1 home] [move be >-1 home] [prune willbe]"); + + private static Tree mergeYodaVerbs(Tree t) { + if (t == null) { + return t; + } + return Tsurgeon.processPattern(mergeYodaVerbsTregex, mergeYodaVerbsTsurgeon, t); + } + private static final TregexPattern changeSbarToPPTregex = TregexPattern.compile("NP < (NP $++ (SBAR=sbar < (IN < /^(?i:after|before|until|since|during)$/ $++ S)))"); @@ -704,6 +728,13 @@ private static Tree findCCparent(Tree t, Tree root) { private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))"); private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]"); + /* + * "not only" is not a MWE, so break up the CONJP similar to "but also". + * compensate for some JJ tagged "only" in this expression + */ + private static final TregexPattern NOT_ONLY_PATTERN = TregexPattern.compile("CONJP|ADVP=conjp < (RB=not < /^(?i)not$/) < (RB|JJ=only < /^(?i)only|just|merely|even$/) ?$+ (__=nextNode < (__ < __))"); + private static final TsurgeonPattern NOT_ONLY_OPERATION = Tsurgeon.parseOperation("[move not $- conjp] [move only $- not] [if exists nextNode move only >1 nextNode] [if exists nextNode move not >1 nextNode] [createSubtree ADVP not] [createSubtree ADVP only] [delete conjp]"); + /* at least / at most / at best / at worst / ... should be treated as if "at" was a preposition and the RBS was a noun. Assumes that the MWE "at least" has already been extracted. */ @@ -725,6 +756,7 @@ public static Tree MWETransform(Tree t) { Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t); Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t); + Tsurgeon.processPattern(NOT_ONLY_PATTERN, NOT_ONLY_OPERATION, t); Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t); Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t); diff --git a/src/edu/stanford/nlp/trees/EnglishPatterns.java b/src/edu/stanford/nlp/trees/EnglishPatterns.java index 2bba7be5ef..3487f98cb8 100644 --- a/src/edu/stanford/nlp/trees/EnglishPatterns.java +++ b/src/edu/stanford/nlp/trees/EnglishPatterns.java @@ -86,9 +86,11 @@ public class EnglishPatterns { /** A list of verbs which are verbs of speaking that easily take an S (as a complement or topicalized) * which is a direct speech ccomp. For example: "He concedes: ``This is a difficult market.''" + *
+ * TODO: maybe sign, as in ASL? sing ... wish? */ public static final String sayVerbRegex = - "/^(?i:say|says|said|saying|(?:add|boast|counsel|explain|inform|interject|recall|remark|respond|proclaim|report|claim|shout|whisper|yell)(?:s|ed|ing)?|(?:advis|announc|acknowledg|conced|conclud|decid|declar|observ|stat|not|inton)(?:e|es|ed|ing)|(?:confess)(?:es|ed|ing)?|(?:agree)(?:s|d|ing)?|reply|replied|replies|replying|admit|admits|admitted|admitting|hold|holds|holding|held|write|writes|writing|wrote|tell|tells|telling|told|quipped|quip|quips|quipping|think|thinks|thinking|thought)$/"; + "/^(?i:say|says|said|saying|(?:add|bellow|bleat|blubber|bluster|boast|boom|bray|call|chant|chirp|claim|complain|coo|counsel|croak|crow|drawl|explain|gasp|inform|interject|pray|proclaim|protest|purr|recall|remark|report|respond|scream|shout|shriek|sigh|sulk|whisper|whoop|yammer|yap|yell|yelp)(?:s|ed|ing)?|(?:advis|announc|acknowledg|cackl|chortl|chuckl|conced|conclud|decid|declar|dron|grip|grous|inton|not|observ|pledg|propos|stat|whin|whing)(?:e|es|ed|ing)|(?:bitch|confess|kibitz|kibbitz|screech)(?:es|ed|ing)?|(?:agree)(?:s|d|ing)?|(?:cr|repl)(?:y|ied|ies|ying)|admit|admits|admitted|admitting|hold|holds|holding|held|write|writes|writing|wrote|tell|tells|telling|told|quipped|quip|quips|quipping|signal|signals|signaled|signalled|signaling|signallingthink|thinks|thinking|thought)$/"; // TODO: is there some better pattern to look for? We do not have tag information at this point diff --git a/src/edu/stanford/nlp/trees/QPTreeTransformer.java b/src/edu/stanford/nlp/trees/QPTreeTransformer.java index a727b449eb..4885e19d4b 100644 --- a/src/edu/stanford/nlp/trees/QPTreeTransformer.java +++ b/src/edu/stanford/nlp/trees/QPTreeTransformer.java @@ -100,6 +100,29 @@ public Tree transformTree(Tree t) { private static final TsurgeonPattern splitMoneyTsurgeon = Tsurgeon.parseOperation("createSubtree QP left right"); + // This fixes a very rare subset of parses + // such as "(NP (QP just about all) the losses) ..." + // in fact, that's the only example in ptb3-revised + // because of previous MWE combinations, we may already get + // "(NP (QP at least a) day)" + // -> "(NP (QP (ADVP at least) a) day)" + // and therefore the flattenAdvmodTsurgeon will also find that parse + private static final TregexPattern groupADVPTregex = + TregexPattern.compile("NP < (QP <1 RB=first <2 RB=second <3 (DT !$+ __) $++ /^N/)"); + + private static final TsurgeonPattern groupADVPTsurgeon = + Tsurgeon.parseOperation("createSubtree ADVP first second"); + + // Remove QP in a structure such as + // (NP (QP nearly_RB all_DT) stuff_NN) + // so that the converter can attach both `nearly` and `all` to `stuff` + // not using a nummod, either, which is kind of annoying + private static final TregexPattern flattenAdvmodTregex = + TregexPattern.compile("NP < (QP=remove <1 ADVP|RB <2 (DT !$+ __) $++ /^N/)"); + + private static final TsurgeonPattern flattenAdvmodTsurgeon = + Tsurgeon.parseOperation("excise remove remove"); + /** * Transforms t if it contains one of the following QP structure: *