Skip to content

Commit e455b6f

Browse files
committed
Fix vibes, graffiti, people in the lemmatizer
1 parent c46a760 commit e455b6f

File tree

3 files changed

+73924
-73064
lines changed

3 files changed

+73924
-73064
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
14181418
<noun,any>"godchildren" { return(stem(3,"","s")); }
14191419
<noun,any>"goings-over" { return(stem(6,"-over","s")); }
14201420
<noun,any>"grandchildren" { return(stem(3,"","s")); }
1421+
<noun,any>"graffiti" { return(stem(1,"o","")); } /* deal with it */
14211422
<noun,any>"halves" { return(stem(3,"f","s")); }
14221423
<noun,any>"hangers-on" { return(stem(4,"-on","s")); }
14231424
<noun,any>"helices" { return(stem(3,"x","s")); }
@@ -1835,7 +1836,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
18351836
<verb,any>("backpedal"|"bankroll"|"bequeath"|"blackball"|"bottom"|"clang"|"debut"|"doctor"|"eyeball"|"factor"|"imperil"|"landfill"|"margin"|"multihull"|"occur"|"overbill"|"pilot"|"prong"|"pyramid"|"reinstall"|"relabel"|"remodel"|"snowball"|"socall"|"squirrel"|"stonewall"|"wrong"){EDING} { return(semi_reg_stem(0,"")); } /* disprefer */
18361837
<noun,any>("beasti"|"browni"|"cach"|"cadr"|"calori"|"champagn"|"cologn"|"cooki"|"druggi"|"eateri"|"emigr"|"emigre"|"employe"|"freebi"|"genr"|"kiddi"|"massacr"|"mooni"|"neckti"|"nich"|"prairi"|"softi"|"toothpast"|"willi")"es" { return(stem(1,"","s")); }
18371838
<noun,any>(({A}*"phobia")|"accompli"|"aegis"|"alias"|"anorexia"|"anti"|"artemisia"|"ataxia"|"beatlemania"|"blini"|"cafeteria"|"capita"|"cola"|"coli"|"deli"|"dementia"|"downstairs"|"upstairs"|"dyslexia"|"jakes"|"dystopia"|"encyclopedia"|"estancia"|"euphoria"|"euthanasia"|"fracas"|"fuss"|"gala"|"gorilla"|"GI"|"habeas"|"haemophilia"|"hemophilia"|"hoopla"|"hula"|"impatiens"|"informatics"|"intelligentsia"|"jacuzzi"|"kiwi"|"mafia"|"magnolia"|"malaria"|"maquila"|"marginalia"|"megalomania"|"mercedes"|"militia"|"mufti"|"muni"|"olympics"|"pancreas"|"paranoia"|"pastoris"|"pastrami"|"pepperoni"|"pepsi"|"pi"|"piroghi"|"pizzeria"|"pneumocystis"|"potpourri"|"proboscis"|"rabies"|"reggae"|"regimen"|"rigatoni"|"salmonella"|"sarsaparilla"|"semen"|"ski"|"sonata"|"spatula"|"stats"|"subtilis"|"sushi"|"tachyarrhythmia"|"tachycardia"|"tequila"|"tetris"|"thrips"|"timpani"|"tsunami"|"vaccinia"|"vanilla") { return(cnull_stem()); }
1838-
<noun,any>("acrobatics"|"athletics"|"basics"|"betters"|"bifocals"|"bowels"|"briefs"|"checkers"|"cognoscenti"|"denims"|"doldrums"|"dramatics"|"dungarees"|"ergonomics"|"genetics"|"gravitas"|"gymnastics"|"hackles"|"haves"|"hubris"|"ides"|"incidentals"|"ironworks"|"jinks"|"leavings"|"leftovers"|"logistics"|"makings"|"microelectronics"|"miniseries"|"mips"|"mores"|"oodles"|"pajamas"|"pampas"|"panties"|"payola"|"pickings"|"plainclothes"|"pliers"|"ravings"|"reparations"|"rudiments"|"scads"|"splits"|"stays"|"subtitles"|"sunglasss"|"sweepstakes"|"tatters"|"toiletries"|"tongs"|"trivia"|"tweezers"|"vibes"|"waterworks"|"woolens") { return(xnull_stem()); }
1839+
<noun,any>("acrobatics"|"athletics"|"basics"|"betters"|"bifocals"|"bowels"|"briefs"|"checkers"|"cognoscenti"|"denims"|"doldrums"|"dramatics"|"dungarees"|"ergonomics"|"genetics"|"gravitas"|"gymnastics"|"hackles"|"haves"|"hubris"|"ides"|"incidentals"|"ironworks"|"jinks"|"leavings"|"leftovers"|"logistics"|"makings"|"microelectronics"|"miniseries"|"mips"|"mores"|"oodles"|"pajamas"|"pampas"|"panties"|"payola"|"pickings"|"plainclothes"|"pliers"|"ravings"|"reparations"|"rudiments"|"scads"|"splits"|"stays"|"subtitles"|"sunglasss"|"sweepstakes"|"tatters"|"toiletries"|"tongs"|"trivia"|"tweezers"|"waterworks"|"woolens") { return(xnull_stem()); }
18391840
<noun,any>("biggi"|"bourgeoisi"|"bri"|"camaraderi"|"chinoiseri"|"coteri"|"doggi"|"geni"|"hippi"|"junki"|"lingeri"|"moxi"|"preppi"|"rooki"|"yuppi")"es" { return(stem(1,"","s")); }
18401841
<verb,any>("chor"|"sepulchr"|"silhouett"|"telescop"){ESEDING} { return(semi_reg_stem(0,"e")); }
18411842
<verb,any>("subpena"|"suds"){EDING} { return(semi_reg_stem(0,"")); }
@@ -2092,6 +2093,10 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
20922093
<scan>"gon"/_VBG { return(stem(1, "", "")); } /* luffa: always VBG? */
20932094
<scan>"wan"/_VB { return(stem(0, "t", "")); } /* luffa: could be VB or VBP. hopefully won't conflict with wane */
20942095
<scan>"na"/_TO { return(stem(2, "to", "")); }
2096+
/* peoples_NNS and people_NN might be the collective form of people, not a single person or multiple persons
2097+
{A}* to capture salespeople, for example */
2098+
<scan>{A}*"people"/_NNS { return(stem(5, "erson", "")); }
2099+
<scan>"ppl"/_NNS { return(stem(3, "person", "")); }
20952100

20962101
<scan>"worse"/_JJR { return(stem(5, "bad", "")); }
20972102
<scan>"worst"/_JJS { return(stem(5, "bad", "")); }

0 commit comments

Comments
 (0)