@@ -49,13 +49,15 @@ import edu.stanford.nlp.util.logging.Redwood;
49
49
/* * A list of verbs that have doubling of consonants
50
50
* this list can be rebuilt with the main method in process.Morphology!
51
51
* the verb stem list lives in "/u/nlp/data/morph/verbstem.list"
52
+ *<br>
53
+ * Removed appal and enrol - use American instead of English lemma for those words
52
54
*/
53
55
private static final String [] verbStems = { " abat" ,
54
56
" abet" , " abhor" , " abut" , " accur" , " acquit" ,
55
57
" adlib" , " admit" , " aerobat" , " aerosol" , " agendaset" ,
56
- " allot" , " alot" , " anagram" , " annul" , " appal " ,
58
+ " allot" , " alot" , " anagram" , " annul" ,
57
59
" apparel" , " armbar" , " aver" , " babysit" , " airdrop" ,
58
- " appal " , " blackleg" , " bobsled" , " bur" , " chum" ,
60
+ " blackleg" , " bobsled" , " bur" , " chum" ,
59
61
" confab" , " counterplot" , " curet" , " dib" , " backdrop" ,
60
62
" backfil" , " backflip" , " backlog" , " backpedal" , " backslap" ,
61
63
" backstab" , " bag" , " balfun" , " ballot" , " ban" ,
@@ -67,7 +69,7 @@ import edu.stanford.nlp.util.logging.Redwood;
67
69
" bevel" , " bewig" , " bib" , " bid" , " billet" ,
68
70
" bin" , " bip" , " bit" , " bitmap" , " blab" ,
69
71
" blag" , " blam" , " blan" , " blat" , " bles" ,
70
- " blim" , " blip" , " blob" , " bloodlet" , " blot" ,
72
+ " blim" , " blip" , " blob" , " blog " , " bloodlet" , " blot" ,
71
73
" blub" , " blur" , " bob" , " bodypop" , " bog" ,
72
74
" booby-trap" , " boobytrap" , " booksel" , " bootleg" , " bop" ,
73
75
" bot" , " bowel" , " bracket" , " brag" , " brig" ,
@@ -102,13 +104,13 @@ import edu.stanford.nlp.util.logging.Redwood;
102
104
" disembowel" , " dishevel" , " disinter" , " dispel" , " disprefer" ,
103
105
" distil" , " dog" , " dognap" , " don" , " doorstep" ,
104
106
" dot" , " dowel" , " drag" , " drat" , " driftnet" ,
105
- " distil" , " egotrip" , " enrol " , " enthral" , " extol" ,
107
+ " distil" , " egotrip" , " enthral" , " extol" ,
106
108
" fulfil" , " gaffe" , " golliwog" , " idyl" , " inspan" ,
107
109
" drip" , " drivel" , " drop" , " drub" , " drug" ,
108
110
" drum" , " dub" , " duel" , " dun" , " dybbuk" ,
109
111
" earwig" , " eavesdrop" , " ecolabel" , " eitherspigot" , " electroblot" ,
110
112
" embed" , " emit" , " empanel" , " enamel" , " endlabel" ,
111
- " endtrim" , " enrol " , " enthral" , " entrammel" , " entrap" ,
113
+ " endtrim" , " enthral" , " entrammel" , " entrap" ,
112
114
" enwrap" , " equal" , " equip" , " estop" , " exaggerat" ,
113
115
" excel" , " expel" , " extol" , " fag" , " fan" ,
114
116
" farewel" , " fat" , " featherbed" , " feget" , " fet" ,
@@ -277,7 +279,7 @@ import edu.stanford.nlp.util.logging.Redwood;
277
279
" wet" , " wham" , " whet" , " whip" , " whir" ,
278
280
" whiteskin" , " whiz" , " whup" , " wildcat" , " win" ,
279
281
" windmil" , " wit" , " woodchop" , " woodcut" , " wor" ,
280
- " worship" , " wrap" , " will" , " wiretap" , " yen" ,
282
+ " worship" , " wrap" , " will" , " wiretap" , " xfer " , " yen" ,
281
283
" yak" , " yap" , " yarnspin" , " yip" , " yodel" ,
282
284
" zag" , " zap" , " zig" , " zig-zag" , " zigzag" ,
283
285
" zip" , " ztrip" };
@@ -456,7 +458,7 @@ CXY = [bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ]
456
458
CXY2 = "bb" | "cc" | "dd" | "ff" | "gg" | "hh" | "jj" | "kk" | "ll" | "mm" | "nn" | "pp" | "qq" | "rr" | "ss" | "tt" | "vv" | "ww" | "xx" | "zz"
457
459
S2 = "ss" | "zz"
458
460
S = [ sxzSXZ] |( [ csCS] "h" )
459
- PRE = "be" | "ex" | "in" | "mis" | "pre" | "pro" | "re"
461
+ PRE = "be" | "de" | " ex"| "in" | "mis" | "pre" | "pro" | "re"
460
462
EDING = "ed" | "ing"
461
463
ESEDING = "es" | "ed" | "ing"
462
464
G = [^ \t\r\n \u2028\u2029\u000B\u000C\u0085 _]
0 commit comments