Skip to content

Commit c46a760

Browse files
committed
update lemmas for gonna, wanna, i, papers, rights
1 parent 9d9b1ae commit c46a760

File tree

3 files changed

+63205
-63162
lines changed

3 files changed

+63205
-63162
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
18241824
<noun,any>(({A}+"itis")|"abdomen"|"acacia"|"achimenes"|"alibi"|"alkali"|"ammonia"|"amnesia"|"anaesthesia"|"anesthesia"|"aria"|"arris"|"asphyxia"|"aspidistra"|"aubrietia"|"axis"|"begonia"|"bias"|"bikini"|"cannula"|"canvas"|"chili"|"chinchilla"|"Christmas"|"cornucopia"|"cupola"|"cyclamen"|"diabetes"|"diphtheria"|"dysphagia"|"encyclopaedia"|"ennui"|"escallonia"|"ferris"|"flotilla"|"forsythia"|"ganglia"|"gas"|"gondola"|"grata"|"guerrilla"|"haemophilia"|"hysteria"|"inertia"|"insignia"|"iris"|"khaki"|"koala"|"lens"|"macaroni"|"manilla"|"mania"|"mantis"|"martini"|"matins"|"memorabilia"|"metropolis"|"moa"|"morphia"|"nostalgia"|"omen"|"pantometria"|"parabola"|"paraphernalia"|"pastis"|"patella"|"patens"|"pelvis"|"peninsula"|"phantasmagoria"|"pneumonia"|"polyuria"|"portcullis"|"pyrexia"|"regalia"|"safari"|"salami"|"sari"|"saturnalia"|"spaghetti"|"specimen"|"subtopia"|"suburbia"|"syphilis"|"taxi"|"toccata"|"trellis"|"tutti"|"umbrella"|"utopia"|"villa"|"zucchini") { return(cnull_stem()); }
18251825
<noun,any>("acumen"|"Afrikaans"|"aphis"|"brethren"|"caries"|"confetti"|"contretemps"|"dais"|"debris"|"extremis"|"gallows"|"hors"|"hovis"|"hustings"|"innards"|"isosceles"|"maquis"|"minutiae"|"molasses"|"mortis"|"patois"|"pectoris"|"plumbites"|"series"|"tares"|"tennis"|"turps") { return(xnull_stem()); }
18261826
/* not included: seconds, as the time usage is much more common than the food usage, and second might work for food anyway */
1827-
<noun,any>("accoutrements"|"aerodynamics"|"aeronautics"|"aesthetics"|"algae"|"amends"|"annals"|"arrears"|"assizes"|"auspices"|"backwoods"|"bacteria"|"banns"|"battlements"|"bedclothes"|"belongings"|"billiards"|"binoculars"|"bitters"|"blandishments"|"bleachers"|"blinkers"|"blues"|"breeches"|"brussels"|"clothes"|"clutches"|"commons"|"confines"|"contents"|"credentials"|"crossbones"|"damages"|"dealings"|"dentures"|"depths"|"devotions"|"diggings"|"doings"|"downs"|"dues"|"dynamics"|"earnings"|"eatables"|"eaves"|"economics"|"electrodynamics"|"electronics"|"entrails"|"environs"|"equities"|"ethics"|"eugenics"|"filings"|"finances"|"folks"|"footlights"|"fumes"|"furnishings"|"genitals"|"glitterati"|"goggles"|"goods"|"grits"|"groceries"|"grounds"|"handcuffs"|"headquarters"|"histrionics"|"hostilities"|"humanities"|"hydraulics"|"hysterics"|"illuminations"|"italics"|"jeans"|"jitters"|"kinetics"|"knickers"|"latitudes"|"leggings"|"likes"|"linguistics"|"lodgings"|"loggerheads"|"mains"|"manners"|"mathematics"|"means"|"measles"|"media"|"memoirs"|"metaphysics"|"mockers"|"motions"|"multimedia"|"munitions"|"news"|"nutria"|"nylons"|"oats"|"odds"|"oils"|"oilskins"|"optics"|"orthodontics"|"outskirts"|"overalls"|"pants"|"pantaloons"|"papers"|"paras"|"paratroops"|"particulars"|"pediatrics"|"phonemics"|"phonetics"|"physics"|"pincers"|"plastics"|"politics"|"proceeds"|"proceedings"|"prospects"|"pyjamas"|"rations"|"ravages"|"refreshments"|"regards"|"reinforcements"|"remains"|"respects"|"returns"|"riches"|"rights"|"savings"|"scissors"|"semantics"|"shades"|"shallows"|"shambles"|"shorts"|"singles"|"slacks"|"specifics"|"spectacles"|"spoils"|"statics"|"statistics"|"summons"|"supplies"|"surroundings"|"suspenders"|"takings"|"teens"|"telecommunications"|"tenterhooks"|"thanks"|"theatricals"|"thermodynamics"|"tights"|"toils"|"trappings"|"travels"|"troops"|"tropics"|"trousers"|"tweeds"|"underpants"|"vapours"|"vicissitudes"|"vitals"|"wages"|"wanderings"|"wares"|"whereabouts"|"whites"|"winnings"|"withers"|"woollens"|"workings"|"writings"|"yes") { return(xnull_stem()); }
1827+
<noun,any>("accoutrements"|"aerodynamics"|"aeronautics"|"aesthetics"|"algae"|"amends"|"annals"|"arrears"|"assizes"|"auspices"|"backwoods"|"bacteria"|"banns"|"battlements"|"bedclothes"|"belongings"|"billiards"|"binoculars"|"bitters"|"blandishments"|"bleachers"|"blinkers"|"blues"|"breeches"|"brussels"|"clothes"|"clutches"|"commons"|"confines"|"contents"|"credentials"|"crossbones"|"damages"|"dealings"|"dentures"|"depths"|"devotions"|"diggings"|"doings"|"downs"|"dues"|"dynamics"|"earnings"|"eatables"|"eaves"|"economics"|"electrodynamics"|"electronics"|"entrails"|"environs"|"equities"|"ethics"|"eugenics"|"filings"|"finances"|"folks"|"footlights"|"fumes"|"furnishings"|"genitals"|"glitterati"|"goggles"|"goods"|"grits"|"groceries"|"grounds"|"handcuffs"|"headquarters"|"histrionics"|"hostilities"|"humanities"|"hydraulics"|"hysterics"|"illuminations"|"italics"|"jeans"|"jitters"|"kinetics"|"knickers"|"latitudes"|"leggings"|"likes"|"linguistics"|"lodgings"|"loggerheads"|"mains"|"manners"|"mathematics"|"means"|"measles"|"media"|"memoirs"|"metaphysics"|"mockers"|"motions"|"multimedia"|"munitions"|"news"|"nutria"|"nylons"|"oats"|"odds"|"oils"|"oilskins"|"optics"|"orthodontics"|"outskirts"|"overalls"|"pants"|"pantaloons"|"paras"|"paratroops"|"particulars"|"pediatrics"|"phonemics"|"phonetics"|"physics"|"pincers"|"plastics"|"politics"|"proceeds"|"proceedings"|"prospects"|"pyjamas"|"rations"|"ravages"|"refreshments"|"regards"|"reinforcements"|"remains"|"respects"|"returns"|"riches"|"savings"|"scissors"|"semantics"|"shades"|"shallows"|"shambles"|"shorts"|"singles"|"slacks"|"specifics"|"spectacles"|"spoils"|"statics"|"statistics"|"summons"|"supplies"|"surroundings"|"suspenders"|"takings"|"teens"|"telecommunications"|"tenterhooks"|"thanks"|"theatricals"|"thermodynamics"|"tights"|"toils"|"trappings"|"travels"|"troops"|"tropics"|"trousers"|"tweeds"|"underpants"|"vapours"|"vicissitudes"|"vitals"|"wages"|"wanderings"|"wares"|"whereabouts"|"whites"|"winnings"|"withers"|"woollens"|"workings"|"writings"|"yes") { return(xnull_stem()); }
18281828
<noun,any>("boati"|"bonhomi"|"clippi"|"creepi"|"deari"|"droppi"|"gendarmeri"|"girli"|"goali"|"haddi"|"kooki"|"kyri"|"lambi"|"lassi"|"mari"|"menageri"|"petti"|"reveri"|"snotti"|"sweeti")"es" { return(stem(1,"","s")); }
18291829
<verb,any>("buffet"|"plummet")"t"{EDING} { return(semi_reg_stem(1,"")); }
18301830
<verb,any>"gunsling" { return(cnull_stem()); }
@@ -2076,19 +2076,22 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
20762076
<scan>"them"/_P { return(stem(1,"y","")); }
20772077
<scan>"me"/_P { return(stem(2,"I","")); }
20782078
<scan>"us"/_P { return(stem(2,"we","")); }
2079-
<scan>"I"/_P { return(proper_name_stem()); }
2079+
<scan>"I"/_P { return(proper_name_stem()); }
2080+
<scan>"i"/_PRP { return(stem(1,"I","")); }
20802081
<scan>"their"/_P { return(stem(2,"y","")); }
20812082
<scan>"my"/_P { return(stem(1,"y","")); }
20822083
<scan>"your"/_P { return(stem(1,"","")); }
20832084
<scan>"his"/_P { return(stem(2,"e","")); }
20842085
<scan>"our"/_P { return(stem(3,"we","")); }
20852086
<scan>"me"/_P { return(stem(2,"I","")); }
20862087
<scan>"us"/_P { return(stem(2,"we","")); }
2087-
<scan>"I"/_P { return(proper_name_stem()); }
20882088
<scan>"an"/_[AD] { return(stem(1, "", "n")); }
20892089
<scan>"those"/_DT { return(stem(3, "at", "")); }
20902090
<scan>"these"/_DT { return(stem(3, "is", "")); }
20912091
<scan>"dat"/_DT { return(stem(3, "that", "")); }
2092+
<scan>"gon"/_VBG { return(stem(1, "", "")); } /* luffa: always VBG? */
2093+
<scan>"wan"/_VB { return(stem(0, "t", "")); } /* luffa: could be VB or VBP. hopefully won't conflict with wane */
2094+
<scan>"na"/_TO { return(stem(2, "to", "")); }
20922095

20932096
<scan>"worse"/_JJR { return(stem(5, "bad", "")); }
20942097
<scan>"worst"/_JJS { return(stem(5, "bad", "")); }

0 commit comments

Comments
 (0)