Skip to content

Commit fbca04d

Browse files
committed
Add the ability to remove just a single Morpho feature without having set it. Turns out, the one who needed that feature was me. Also, leave a note on a bomb that can happen if an operation is put over two lines
1 parent cabc020 commit fbca04d

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/EditNode.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@ public class EditNode extends SsurgeonEdit {
2525

2626
final String nodeName;
2727
final List<String> removedAttributes;
28+
final List<String> removedMorpho;
2829
final Map<String, String> attributes;
2930
final Map<String, String> updateMorphoFeatures;
3031

31-
public EditNode(String nodeName, Map<String, String> attributes, String updateMorphoFeatures, List<String> removedAttributes) {
32+
public EditNode(String nodeName, Map<String, String> attributes, String updateMorphoFeatures, List<String> removedAttributes, List<String> removedMorpho) {
3233
if (nodeName == null) {
3334
throw new SsurgeonParseException("Cannot make an EditNode with no nodeName");
3435
}
@@ -49,6 +50,7 @@ public EditNode(String nodeName, Map<String, String> attributes, String updateMo
4950
throw new SsurgeonParseException("Unknown attribute |" + attr + "| when building an EditNode operation");
5051
}
5152
}
53+
this.removedMorpho = new ArrayList<>(removedMorpho);
5254
}
5355

5456

@@ -77,6 +79,13 @@ public String toEditString() {
7779
buf.append(CoNLLUFeatures.toFeatureString(this.updateMorphoFeatures));
7880
}
7981

82+
for (String remove : removedMorpho) {
83+
buf.append(" ");
84+
buf.append(Ssurgeon.REMOVE_MORPHO_FEATURES);
85+
buf.append(" ");
86+
buf.append(remove);
87+
}
88+
8089
return buf.toString();
8190
}
8291

@@ -118,6 +127,17 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
118127
}
119128
}
120129

130+
for (String key : removedMorpho) {
131+
CoNLLUFeatures features = word.get(CoreAnnotations.CoNLLUFeats.class);
132+
if (features == null) {
133+
continue;
134+
}
135+
if (features.get(key) != null) {
136+
changed = true;
137+
features.remove(key);
138+
}
139+
}
140+
121141
for (String key : removedAttributes) {
122142
Class<? extends CoreAnnotation<?>> clazz = AnnotationLookup.toCoreKey(key);
123143
if (word.remove((Class) clazz) != null) {

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@
138138
* {@code ...attributes...} are the attributes to change, same as with {@code addDep}
139139
* {@code -morphofeatures ...} will set the features to be exactly as written.
140140
* {@code -updateMorphoFeatures ...} will edit or add the features without overwriting existing features.
141-
* TODO: if anyone needs the ability to remove features without resetting the entire features map,
142-
* please file an issue on github.
141+
* {@code -removeMorphoFeatures ...} will remove this one morpho feature.
143142
* {@code -remove ...} will remove the attribute entirely, such as doing {@code -remove lemma} to remove the lemma.
144143
*</p><p>
145144
* {@code lemmatize} will put a lemma on a word.
@@ -230,6 +229,18 @@
230229
addDep -gov antennae -reln dep -word blue
231230
}
232231
</pre>
232+
* Some patterns which leave the node in the same format will bomb because of the way the dirty bit works. For example:
233+
<pre>
234+
{@code
235+
{word:/pattern/;cpos:VERB;morphofeatures:{VerbForm:Inf}}=word
236+
EditNode -node word -remove morphofeatures
237+
EditNode -node word -updatemorphofeatures Aspect=Imp -updatemorphofeatures VerbForm=Inf
238+
}
239+
</pre>
240+
* Here, the end result will be the same after at most one iteration through the loop,
241+
* but {@code -remove morphofeatures} sets the dirty bit and does not go away
242+
* when {@code -updatemorphofeatures} puts back the deleted features.
243+
* TODO: this one at least can be fixed
233244
*
234245
* @author Eric Yeh
235246
*/
@@ -420,6 +431,7 @@ public Collection<SsurgeonWordlist> getResources() {
420431
public static final String UPDATE_MORPHO_FEATURES = "-updateMorphoFeatures";
421432
public static final String UPDATE_MORPHO_FEATURES_LOWER = "-updatemorphofeatures";
422433
public static final String REMOVE = "-remove";
434+
public static final String REMOVE_MORPHO_FEATURES = "-removeMorphoFeatures";
423435

424436

425437
// args for Ssurgeon edits, allowing us to not
@@ -454,6 +466,8 @@ protected static class SsurgeonArgs {
454466
public Map<String, String> annotations = new TreeMap<>();
455467

456468
public List<String> remove = new ArrayList<>();
469+
470+
public List<String> removeMorphoFeatures = new ArrayList<>();
457471
}
458472

459473
/**
@@ -537,6 +551,9 @@ private static SsurgeonArgs parseArgsBox(String args, Map<String, String> additi
537551
case REMOVE:
538552
argsBox.remove.add(argsValue);
539553
break;
554+
case REMOVE_MORPHO_FEATURES:
555+
argsBox.removeMorphoFeatures.add(argsValue);
556+
break;
540557
default:
541558
String key = argsKey.substring(1);
542559
Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
@@ -602,7 +619,7 @@ public static SsurgeonEdit parseEditLine(String editLine, Map<String, String> at
602619
if (argsBox.nodes.size() != 1) {
603620
throw new SsurgeonParseException("Cannot make an EditNode out of " + argsBox.nodes.size() + " nodes. Please use exactly one -node");
604621
}
605-
return new EditNode(argsBox.nodes.get(0), argsBox.annotations, argsBox.updateMorphoFeatures, argsBox.remove);
622+
return new EditNode(argsBox.nodes.get(0), argsBox.annotations, argsBox.updateMorphoFeatures, argsBox.remove, argsBox.removeMorphoFeatures);
606623
} else if (command.equalsIgnoreCase(Lemmatize.LABEL)) {
607624
if (argsBox.nodes.size() != 1) {
608625
throw new SsurgeonParseException("Cannot make a Lemmatize out of " + argsBox.nodes.size() + " nodes. Please use exactly one -node");

0 commit comments

Comments
 (0)