Skip to content

Commit 0269a15

Browse files
committed
Add a few more singular form demonyms and include them as possible JJ as well
1 parent e455b6f commit 0269a15

File tree

3 files changed

+73982
-74112
lines changed

3 files changed

+73982
-74112
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ EY_ADJS = "cag"|"cak"|"clay"|"cliqu"|"crep"|"dic"|"dop"|"glu"|"goo"|"grip"|"hok"
474474

475475
COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
476476

477+
SINGULAR_DEMONYMS = "Bantu"|"Bengalese"|"Beninese"|"British"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Danish"|"English"|"Finnish"|"Flemish"|"Gabonese"|"Guyanese"|"Irish"|"Japanese"|"Javanese"|"Jewish"|"Lebanese"|"Maltese"|"Moorish"|"Nepalese"|"Nipponese"|"Polish"|"Portuguese"|"Rhenish"|"Romish"|"Scottish"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Spanish"|"Sudanese"|"Swedish"|"Swiss"|"Taiwanese"|"Togolese"|"Turkish"|"Vietnamese"|"Yiddish"
478+
477479
%%
478480

479481
/* can and will not always modal so can be inflected */
@@ -1160,7 +1162,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
11601162
<noun,any>("Brahman"|"German"|"dragoman"|"ottoman"|"shaman"|"talisman"|"Norman"|"Pullman"|"Roman")"s" { return(stem(1,"","s")); }
11611163
<noun,any>("Czech"|"diptych"|"Sassenach"|"abdomen"|"alibi"|"aria"|"bandit"|"begonia"|"bikini"|"caryatid"|"colon"|"cornucopia"|"cromlech"|"cupola"|"dryad"|"eisteddfod"|"encyclopaedia"|"epoch"|"eunuch"|"flotilla"|"gardenia"|"gestalt"|"gondola"|"hierarch"|"hose"|"impediment"|"koala"|"loch"|"mania"|"manservant"|"martini"|"matriarch"|"monarch"|"oligarch"|"omen"|"parabola"|"pastorale"|"patriarch"|"pea"|"peninsula"|"pfennig"|"phantasmagoria"|"pibroch"|"poly"|"real"|"safari"|"sari"|"specimen"|"standby"|"stomach"|"swami"|"taxi"|"tech"|"toccata"|"triptych"|"villa"|"yogi"|"zloty")"s" { return(stem(1,"","s")); }
11621164
<noun,any>("asyl"|"sanct"|"rect"|"pl"|"pendul"|"mausole"|"hoodl"|"for")"ums" { return(stem(1,"","s")); }
1163-
<noun,any>("Bantu"|"Bengalese"|"Beninese"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Gabonese"|"Guyanese"|"Japanese"|"Javanese"|"Lebanese"|"Maltese"|"Olympics"|"Portuguese"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Sudanese"|"Swiss"|"Taiwanese"|"Togolese"|"Vietnamese"|"aircraft"|"anopheles"|"apparatus"|"asparagus"|"barracks"|"bellows"|"bison"|"bluefish"|"bob"|"bourgeois"|"bream"|"brill"|"butterfingers"|"carp"|"catfish"|"chassis"|"chub"|"cod"|"codfish"|"coley"|"contretemps"|"corps"|"crawfish"|"crayfish"|"crossroads"|"cuttlefish"|"dace"|"dice"|"dogfish"|"doings"|"dory"|"downstairs"|"eldest"|"feces"|"finnan"|"firstborn"|"fish"|"flatfish"|"flounder"|"fowl"|"fry"|"fries"|{A}+"-works"|"gasworks"|"glassworks"|"globefish"|"goldfish"|"grand"|"gudgeon"|"gulden"|"haddock"|"hake"|"halibut"|"headquarters"|"herring"|"hertz"|"horsepower"|"hovercraft"|"hundredweight"|"ironworks"|"jackanapes"|"kilohertz"|"kurus"|"kwacha"|"ling"|"lungfish"|"mackerel"|"means"|"megahertz"|"moorfowl"|"moorgame"|"mullet"|"offspring"|"pampas"|"parr"|"patois"|"pekinese"|"penn'orth"|"perch"|"pickerel"|"pike"|"pince-nez"|"plaice"|"precis"|"quid"|"rand"|"rendezvous"|"revers"|"roach"|"roux"|"salmon"|"samurai"|"series"|"shad"|"sheep"|"shellfish"|"smelt"|"spacecraft"|"species"|"starfish"|"stockfish"|"sunfish"|"superficies"|"sweepstakes"|"swordfish"|"tench"|"tope"|"triceps"|"trout"|"tuna"|"tunafish"|"tunny"|"turbot"|"undersigned"|"veg"|"waterfowl"|"waterworks"|"waxworks"|"whiting"|"wildfowl"|"woodworm"|"yen") { return(xnull_stem()); }
1165+
<noun,any>("aircraft"|"anopheles"|"apparatus"|"asparagus"|"barracks"|"bellows"|"bison"|"bluefish"|"bob"|"bourgeois"|"bream"|"brill"|"butterfingers"|"carp"|"catfish"|"chassis"|"chub"|"cod"|"codfish"|"coley"|"contretemps"|"corps"|"crawfish"|"crayfish"|"crossroads"|"cuttlefish"|"dace"|"dice"|"dogfish"|"doings"|"dory"|"downstairs"|"eldest"|"feces"|"finnan"|"firstborn"|"fish"|"flatfish"|"flounder"|"fowl"|"fry"|"fries"|{A}+"-works"|"gasworks"|"glassworks"|"globefish"|"goldfish"|"grand"|"gudgeon"|"gulden"|"haddock"|"hake"|"halibut"|"headquarters"|"herring"|"hertz"|"horsepower"|"hovercraft"|"hundredweight"|"ironworks"|"jackanapes"|"kilohertz"|"kurus"|"kwacha"|"ling"|"lungfish"|"mackerel"|"means"|"megahertz"|"moorfowl"|"moorgame"|"mullet"|"offspring"|"pampas"|"parr"|"patois"|"pekinese"|"penn'orth"|"perch"|"pickerel"|"pike"|"pince-nez"|"plaice"|"precis"|"quid"|"rand"|"rendezvous"|"revers"|"roach"|"roux"|"salmon"|"samurai"|"series"|"shad"|"sheep"|"shellfish"|"smelt"|"spacecraft"|"species"|"starfish"|"stockfish"|"sunfish"|"superficies"|"sweepstakes"|"swordfish"|"tench"|"tope"|"triceps"|"trout"|"tuna"|"tunafish"|"tunny"|"turbot"|"undersigned"|"veg"|"waterfowl"|"waterworks"|"waxworks"|"whiting"|"wildfowl"|"woodworm"|"yen") { return(xnull_stem()); }
11641166
<noun,any>"Aries" { return(stem(1,"s","s")); }
11651167
<noun,any>"Pisces" { return(stem(1,"s","s")); }
11661168
<noun,any>"Bengali" { return(stem(1,"i","s")); }
@@ -1835,7 +1837,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
18351837
<verb,any>("accret"|"clon"|"deplet"|"dethron"|"dup"|"excret"|"expedit"|"extradit"|"fet"|"finetun"|"gor"|"hing"|"massacr"|"obsolet"|"reconven"|"recreat"|"recus"|"reignit"|"swip"|"videotap"|"zon"){ESEDING} { return(semi_reg_stem(0,"e")); }
18361838
<verb,any>("backpedal"|"bankroll"|"bequeath"|"blackball"|"bottom"|"clang"|"debut"|"doctor"|"eyeball"|"factor"|"imperil"|"landfill"|"margin"|"multihull"|"occur"|"overbill"|"pilot"|"prong"|"pyramid"|"reinstall"|"relabel"|"remodel"|"snowball"|"socall"|"squirrel"|"stonewall"|"wrong"){EDING} { return(semi_reg_stem(0,"")); } /* disprefer */
18371839
<noun,any>("beasti"|"browni"|"cach"|"cadr"|"calori"|"champagn"|"cologn"|"cooki"|"druggi"|"eateri"|"emigr"|"emigre"|"employe"|"freebi"|"genr"|"kiddi"|"massacr"|"mooni"|"neckti"|"nich"|"prairi"|"softi"|"toothpast"|"willi")"es" { return(stem(1,"","s")); }
1838-
<noun,any>(({A}*"phobia")|"accompli"|"aegis"|"alias"|"anorexia"|"anti"|"artemisia"|"ataxia"|"beatlemania"|"blini"|"cafeteria"|"capita"|"cola"|"coli"|"deli"|"dementia"|"downstairs"|"upstairs"|"dyslexia"|"jakes"|"dystopia"|"encyclopedia"|"estancia"|"euphoria"|"euthanasia"|"fracas"|"fuss"|"gala"|"gorilla"|"GI"|"habeas"|"haemophilia"|"hemophilia"|"hoopla"|"hula"|"impatiens"|"informatics"|"intelligentsia"|"jacuzzi"|"kiwi"|"mafia"|"magnolia"|"malaria"|"maquila"|"marginalia"|"megalomania"|"mercedes"|"militia"|"mufti"|"muni"|"olympics"|"pancreas"|"paranoia"|"pastoris"|"pastrami"|"pepperoni"|"pepsi"|"pi"|"piroghi"|"pizzeria"|"pneumocystis"|"potpourri"|"proboscis"|"rabies"|"reggae"|"regimen"|"rigatoni"|"salmonella"|"sarsaparilla"|"semen"|"ski"|"sonata"|"spatula"|"stats"|"subtilis"|"sushi"|"tachyarrhythmia"|"tachycardia"|"tequila"|"tetris"|"thrips"|"timpani"|"tsunami"|"vaccinia"|"vanilla") { return(cnull_stem()); }
1840+
<noun,any>(({A}*"phobia")|"accompli"|"aegis"|"alias"|"anorexia"|"anti"|"artemisia"|"ataxia"|"beatlemania"|"blini"|"cafeteria"|"capita"|"cola"|"coli"|"deli"|"dementia"|"downstairs"|"upstairs"|"dyslexia"|"jakes"|"dystopia"|"encyclopedia"|"estancia"|"euphoria"|"euthanasia"|"fracas"|"fuss"|"gala"|"gorilla"|"GI"|"habeas"|"haemophilia"|"hemophilia"|"hoopla"|"hula"|"impatiens"|"informatics"|"intelligentsia"|"jacuzzi"|"kiwi"|"mafia"|"magnolia"|"malaria"|"maquila"|"marginalia"|"megalomania"|"mercedes"|"militia"|"mufti"|"muni"|"pancreas"|"paranoia"|"pastoris"|"pastrami"|"pepperoni"|"pepsi"|"pi"|"piroghi"|"pizzeria"|"pneumocystis"|"potpourri"|"proboscis"|"rabies"|"reggae"|"regimen"|"rigatoni"|"salmonella"|"sarsaparilla"|"semen"|"ski"|"sonata"|"spatula"|"stats"|"subtilis"|"sushi"|"tachyarrhythmia"|"tachycardia"|"tequila"|"tetris"|"thrips"|"timpani"|"tsunami"|"vaccinia"|"vanilla") { return(cnull_stem()); }
18391841
<noun,any>("acrobatics"|"athletics"|"basics"|"betters"|"bifocals"|"bowels"|"briefs"|"checkers"|"cognoscenti"|"denims"|"doldrums"|"dramatics"|"dungarees"|"ergonomics"|"genetics"|"gravitas"|"gymnastics"|"hackles"|"haves"|"hubris"|"ides"|"incidentals"|"ironworks"|"jinks"|"leavings"|"leftovers"|"logistics"|"makings"|"microelectronics"|"miniseries"|"mips"|"mores"|"oodles"|"pajamas"|"pampas"|"panties"|"payola"|"pickings"|"plainclothes"|"pliers"|"ravings"|"reparations"|"rudiments"|"scads"|"splits"|"stays"|"subtitles"|"sunglasss"|"sweepstakes"|"tatters"|"toiletries"|"tongs"|"trivia"|"tweezers"|"waterworks"|"woolens") { return(xnull_stem()); }
18401842
<noun,any>("biggi"|"bourgeoisi"|"bri"|"camaraderi"|"chinoiseri"|"coteri"|"doggi"|"geni"|"hippi"|"junki"|"lingeri"|"moxi"|"preppi"|"rooki"|"yuppi")"es" { return(stem(1,"","s")); }
18411843
<verb,any>("chor"|"sepulchr"|"silhouett"|"telescop"){ESEDING} { return(semi_reg_stem(0,"e")); }
@@ -2098,6 +2100,9 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
20982100
<scan>{A}*"people"/_NNS { return(stem(5, "erson", "")); }
20992101
<scan>"ppl"/_NNS { return(stem(3, "person", "")); }
21002102

2103+
<scan>"Olympics"/_NN(P?)(S?) { return(capitalise(xnull_stem())); }
2104+
<scan>{SINGULAR_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(xnull_stem())); }
2105+
21012106
<scan>"worse"/_JJR { return(stem(5, "bad", "")); }
21022107
<scan>"worst"/_JJS { return(stem(5, "bad", "")); }
21032108
<scan>"worse"/_RBR { return(stem(5, "badly", "")); }

0 commit comments

Comments
 (0)