Skip to content

Commit 0fba443

Browse files
committed
add Polynesian and fix supplies
1 parent 56362e9 commit 0fba443

File tree

2 files changed

+70902
-70931
lines changed

2 files changed

+70902
-70931
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ EY_ADJS = "cag"|"cak"|"clay"|"cliqu"|"crep"|"dic"|"dop"|"glu"|"goo"|"grip"|"hok"
475475
COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
476476

477477
SINGULAR_DEMONYMS = "Bantu"|"Bengalese"|"Beninese"|"Borg"|"British"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Danish"|"English"|"Ferengi"|"Finnish"|"Flemish"|"French"|"Gabonese"|"Guyanese"|"Irish"|"Japanese"|"Javanese"|"Jewish"|"Jóola"|"Lebanese"|"Maltese"|"Moorish"|"Nepalese"|"Nipponese"|"Polish"|"Portuguese"|"Rhenish"|"Romish"|"Scottish"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Spanish"|"Sudanese"|"Swedish"|"Swiss"|"Taiwanese"|"Togolese"|"Turkish"|"Vietnamese"|"Welsh"|"Yiddish"
478-
S_ENDING_DEMONYMS = "Afghan"|"Afghani"|"African"|"Albanian"|"Alexandrine"|"Algerian"|"Alsatian"|"American"|"Amharic"|"Andorian"|"Andorran"|"Anglican"|"Angolan"|"Anguillan"|"Antiguan"|"Arab"|"Arabian"|"Arabic"|"Arcadian"|"Argentinian"|"Aristotelian"|"Aryan"|"Asian"|"Asiatic"|"Attic"|"Augustan"|"Australian"|"Austrian"|"Ba\'athist"|"Baathist"|"Baghdadi"|"Bahamiam"|"Bahamian"|"Bangladeshi"|"Barbadian"|"Belgian"|"Bengali"|"Bermudan"|"Bolian"|"Bolivian"|"Brazilian"|"Britannic"|"Bruneian"|"Bulgarian"|"Burundian"|"Caesarian"|"Californian"|"Cambodian"|"Cameroonian"|"Canadian"|"Catalan"|"Caucasian"|"Celtic"|"Cesarean"|"Chadian"|"Chilean"|"Christian"|"Colombian"|"Confucian"|"Copernican"|"Coptic"|"Corinthian"|"Cuban"|"Cyclopean"|"Cyprian"|"Cyrillic"|"Czech"|"Czechoslovakian"|"Delphic"|"Democrat"|"Djiboutian"|"Dominican"|"Doric"|"Ecuadorian"|"Egyptian"|"Elizabethan"|"Elysian"|"Eritrean"|"Ethiopian"|"Euclidean"|"Eurasian"|"European"|"Eustachian"|"Evangelical"|"Fabian"|"Fallopian"|"Fijian"|"Filipino"|"Firang"|"Franciscan"|"Freudian"|"Gaelic"|"Gallic"|"Gambian"|"Georgian"|"German"|"Germanic"|"Ghanaian"|"Gibraltarian"|"Gilbertian"|"Gordian"|"Gothic"|"Grecian"|"Greek"|"Gregorian"|"Grenadian"|"Guatemalan"|"Guinean"|"Haitian"|"Hebraic"|"Hellenic"|"Hertzian"|"Hippocratic"|"Hispanic"|"Homeric"|"Honduran"|"Hungarian"|"Husker"|"Icelandic"|"Indian"|"Indonesian"|"Ionic"|"Iranian"|"Iraqi"|"Islamic"|"Israeli"|"Italian"|"Jacobean"|"Jamaican"|"Jordanian"|"Judaic"|"Julian"|"Kampuchean"|"Kenyan"|"Korean"|"Kurd"|"Laotian"|"Liberian"|"Libertarian"|"Libyan"|"Lilliputian"|"Lutheran"|"Macedonian"|"Madagascan"|"Malawian"|"Malayan"|"Malaysian"|"Malian"|"Malthusian"|"Martian"|"Masonic"|"Mauritanian"|"Mauritian"|"Mediterranean"|"Mendelian"|"Mephistophelian"|"Mexican"|"Mohammedan"|"Mongolian"|"Montserratian"|"Moroccan"|"Mosaic"|"Moslem"|"Mozambican"|"Muhammadan"|"Muslim"|"Namibian"|"Napoleonic"|"Nauruan"|"Nazi"|"Neapolitan"|"Newtonian"|"Nicaraguan"|"Nigerian"|"Nilotic"|"Nordic"|"Norman"|"Norwegian"|"Olympian"|"Olympic"|"Oxonian"|"Pakistani"|"Palestinian"|"Panamanian"|"Papuan"|"Paraguayan"|"Parisian"|"Parmesan"|"Parthian"|"Pashtun"|"Persian"|"Peruvian"|"Platonic"|"Presbyterian"|"Protestant"|"Prussian"|"Punic"|"Pyrrhic"|"Rabelaisian"|"Ramadan"|"Republican"|"Romaic"|"Roman"|"Romanian"|"Rotarian"|"Ruritanian"|"Russian"|"Rwandan"|"Sabahan"|"Saddamite"|"Sadrist"|"Salafi"|"Salvadorean"|"Samaritan"|"Samoan"|"Sapphic"|"Sarawakian"|"Satanic"|"Saudi"|"Scandinavian"|"Semitic"|"Shakespearian"|"Shavian"|"Shiite"|"Siberian"|"Sicilian"|"Singaporean"|"Sinhala"|"Slavonic"|"Slovenian"|"Socratic"|"Somali"|"Somalian"|"Spartan"|"Stygian"|"Sumatran"|"Sunni"|"Syrian"|"Tahitian"|"Tamil"|"Tanzanian"|"Terpsichorean"|"Teutonic"|"Texan"|"Thai"|"Thespian"|"Tibetan"|"Tobagonian"|"Tongan"|"Trinidadian"|"Trojan"|"Tunisian"|"Ugandan"|"Unitarian"|"Uruguayan"|"Utopian"|"Vatican"|"Venetian"|"Venezuelan"|"Victorian"|"Vulcan"|"Wahhabi"|"Wesleyan"|"Westerner"|"Yugoslavian"|"Zairean"|"Zambian"|"Zimbabwean"|"Zionist"
478+
S_ENDING_DEMONYMS = "Afghan"|"Afghani"|"African"|"Albanian"|"Alexandrine"|"Algerian"|"Alsatian"|"American"|"Amharic"|"Andorian"|"Andorran"|"Anglican"|"Angolan"|"Anguillan"|"Antiguan"|"Arab"|"Arabian"|"Arabic"|"Arcadian"|"Argentinian"|"Aristotelian"|"Aryan"|"Asian"|"Asiatic"|"Attic"|"Augustan"|"Australian"|"Austrian"|"Ba\'athist"|"Baathist"|"Baghdadi"|"Bahamiam"|"Bahamian"|"Bangladeshi"|"Barbadian"|"Belgian"|"Bengali"|"Bermudan"|"Bolian"|"Bolivian"|"Brazilian"|"Britannic"|"Bruneian"|"Bulgarian"|"Burundian"|"Caesarian"|"Californian"|"Cambodian"|"Cameroonian"|"Canadian"|"Catalan"|"Caucasian"|"Celtic"|"Cesarean"|"Chadian"|"Chilean"|"Christian"|"Colombian"|"Confucian"|"Copernican"|"Coptic"|"Corinthian"|"Cuban"|"Cyclopean"|"Cyprian"|"Cyrillic"|"Czech"|"Czechoslovakian"|"Delphic"|"Democrat"|"Djiboutian"|"Dominican"|"Doric"|"Ecuadorian"|"Egyptian"|"Elizabethan"|"Elysian"|"Eritrean"|"Ethiopian"|"Euclidean"|"Eurasian"|"European"|"Eustachian"|"Evangelical"|"Fabian"|"Fallopian"|"Fijian"|"Filipino"|"Firang"|"Franciscan"|"Freudian"|"Gaelic"|"Gallic"|"Gambian"|"Georgian"|"German"|"Germanic"|"Ghanaian"|"Gibraltarian"|"Gilbertian"|"Gordian"|"Gothic"|"Grecian"|"Greek"|"Gregorian"|"Grenadian"|"Guatemalan"|"Guinean"|"Haitian"|"Hebraic"|"Hellenic"|"Hertzian"|"Hippocratic"|"Hispanic"|"Homeric"|"Honduran"|"Hungarian"|"Husker"|"Icelandic"|"Indian"|"Indonesian"|"Ionic"|"Iranian"|"Iraqi"|"Islamic"|"Israeli"|"Italian"|"Jacobean"|"Jamaican"|"Jordanian"|"Judaic"|"Julian"|"Kampuchean"|"Kenyan"|"Korean"|"Kurd"|"Laotian"|"Liberian"|"Libertarian"|"Libyan"|"Lilliputian"|"Lutheran"|"Macedonian"|"Madagascan"|"Malawian"|"Malayan"|"Malaysian"|"Malian"|"Malthusian"|"Martian"|"Masonic"|"Mauritanian"|"Mauritian"|"Mediterranean"|"Mendelian"|"Mephistophelian"|"Mexican"|"Mohammedan"|"Mongolian"|"Montserratian"|"Moroccan"|"Mosaic"|"Moslem"|"Mozambican"|"Muhammadan"|"Muslim"|"Namibian"|"Napoleonic"|"Nauruan"|"Nazi"|"Neapolitan"|"Newtonian"|"Nicaraguan"|"Nigerian"|"Nilotic"|"Nordic"|"Norman"|"Norwegian"|"Olympian"|"Olympic"|"Oxonian"|"Pakistani"|"Palestinian"|"Panamanian"|"Papuan"|"Paraguayan"|"Parisian"|"Parmesan"|"Parthian"|"Pashtun"|"Persian"|"Peruvian"|"Platonic"|"Polynesian"|"Presbyterian"|"Protestant"|"Prussian"|"Punic"|"Pyrrhic"|"Rabelaisian"|"Ramadan"|"Republican"|"Romaic"|"Roman"|"Romanian"|"Rotarian"|"Ruritanian"|"Russian"|"Rwandan"|"Sabahan"|"Saddamite"|"Sadrist"|"Salafi"|"Salvadorean"|"Samaritan"|"Samoan"|"Sapphic"|"Sarawakian"|"Satanic"|"Saudi"|"Scandinavian"|"Semitic"|"Shakespearian"|"Shavian"|"Shiite"|"Siberian"|"Sicilian"|"Singaporean"|"Sinhala"|"Slavonic"|"Slovenian"|"Socratic"|"Somali"|"Somalian"|"Spartan"|"Stygian"|"Sumatran"|"Sunni"|"Syrian"|"Tahitian"|"Tamil"|"Tanzanian"|"Terpsichorean"|"Teutonic"|"Texan"|"Thai"|"Thespian"|"Tibetan"|"Tobagonian"|"Tongan"|"Trinidadian"|"Trojan"|"Tunisian"|"Ugandan"|"Unitarian"|"Uruguayan"|"Utopian"|"Vatican"|"Venetian"|"Venezuelan"|"Victorian"|"Vulcan"|"Wahhabi"|"Wesleyan"|"Westerner"|"Yugoslavian"|"Zairean"|"Zambian"|"Zimbabwean"|"Zionist"
479479

480480
%%
481481

@@ -1828,7 +1828,7 @@ S_ENDING_DEMONYMS = "Afghan"|"Afghani"|"African"|"Albanian"|"Alexandrine"|"Alger
18281828
<noun,any>(({A}+"itis")|"abdomen"|"acacia"|"achimenes"|"alibi"|"alkali"|"ammonia"|"amnesia"|"anaesthesia"|"anesthesia"|"aria"|"arris"|"asphyxia"|"aspidistra"|"aubrietia"|"axis"|"begonia"|"bias"|"bikini"|"cannula"|"canvas"|"chili"|"chinchilla"|"Christmas"|"cornucopia"|"cupola"|"cyclamen"|"diabetes"|"diphtheria"|"dysphagia"|"encyclopaedia"|"ennui"|"escallonia"|"ferris"|"flotilla"|"forsythia"|"ganglia"|"gas"|"gondola"|"grata"|"guerrilla"|"haemophilia"|"hysteria"|"inertia"|"insignia"|"iris"|"khaki"|"koala"|"lens"|"macaroni"|"manilla"|"mania"|"mantis"|"martini"|"matins"|"memorabilia"|"metropolis"|"moa"|"morphia"|"nostalgia"|"omen"|"pantometria"|"parabola"|"paraphernalia"|"pastis"|"patella"|"patens"|"pelvis"|"peninsula"|"phantasmagoria"|"pneumonia"|"polyuria"|"portcullis"|"pyrexia"|"regalia"|"safari"|"salami"|"sari"|"saturnalia"|"spaghetti"|"specimen"|"subtopia"|"suburbia"|"syphilis"|"taxi"|"toccata"|"trellis"|"tutti"|"umbrella"|"utopia"|"villa"|"zucchini") { return(cnull_stem()); }
18291829
<noun,any>("acumen"|"Afrikaans"|"aphis"|"brethren"|"caries"|"confetti"|"contretemps"|"dais"|"debris"|"extremis"|"gallows"|"hors"|"hovis"|"hustings"|"innards"|"isosceles"|"maquis"|"minutiae"|"molasses"|"mortis"|"patois"|"pectoris"|"plumbites"|"series"|"tares"|"tennis"|"turps") { return(xnull_stem()); }
18301830
/* not included: seconds, as the time usage is much more common than the food usage, and second might work for food anyway */
1831-
<noun,any>("accoutrements"|"aerodynamics"|"aeronautics"|"aesthetics"|"algae"|"amends"|"annals"|"arrears"|"assizes"|"auspices"|"backwoods"|"bacteria"|"banns"|"battlements"|"bedclothes"|"belongings"|"billiards"|"binoculars"|"bitters"|"blandishments"|"bleachers"|"blinkers"|"blues"|"breeches"|"brussels"|"clothes"|"clutches"|"commons"|"confines"|"contents"|"credentials"|"crossbones"|"damages"|"dealings"|"dentures"|"depths"|"devotions"|"diggings"|"doings"|"downs"|"dues"|"dynamics"|"earnings"|"eatables"|"eaves"|"economics"|"electrodynamics"|"electronics"|"entrails"|"environs"|"equities"|"ethics"|"eugenics"|"filings"|"finances"|"folks"|"footlights"|"fumes"|"furnishings"|"genitals"|"glitterati"|"goggles"|"goods"|"grits"|"groceries"|"grounds"|"handcuffs"|"headquarters"|"histrionics"|"hostilities"|"humanities"|"hydraulics"|"hysterics"|"illuminations"|"italics"|"jeans"|"jitters"|"kinetics"|"knickers"|"latitudes"|"leggings"|"likes"|"linguistics"|"lodgings"|"loggerheads"|"mains"|"manners"|"mathematics"|"means"|"measles"|"media"|"memoirs"|"metaphysics"|"mockers"|"motions"|"multimedia"|"munitions"|"news"|"nutria"|"nylons"|"oats"|"odds"|"oils"|"oilskins"|"optics"|"orthodontics"|"outskirts"|"overalls"|"pants"|"pantaloons"|"paras"|"paratroops"|"particulars"|"pediatrics"|"phonemics"|"phonetics"|"physics"|"pincers"|"plastics"|"politics"|"proceeds"|"proceedings"|"prospects"|"pyjamas"|"rations"|"ravages"|"refreshments"|"regards"|"reinforcements"|"remains"|"respects"|"returns"|"riches"|"savings"|"scissors"|"semantics"|"shades"|"shallows"|"shambles"|"shorts"|"singles"|"slacks"|"specifics"|"spectacles"|"spoils"|"statics"|"statistics"|"summons"|"supplies"|"surroundings"|"suspenders"|"takings"|"teens"|"telecommunications"|"tenterhooks"|"thanks"|"theatricals"|"thermodynamics"|"tights"|"toils"|"trappings"|"travels"|"troops"|"tropics"|"trousers"|"tweeds"|"underpants"|"vapours"|"vicissitudes"|"vitals"|"wages"|"wanderings"|"wares"|"whereabouts"|"whites"|"winnings"|"withers"|"woollens"|"workings"|"writings"|"yes") { return(xnull_stem()); }
1831+
<noun,any>("accoutrements"|"aerodynamics"|"aeronautics"|"aesthetics"|"algae"|"amends"|"annals"|"arrears"|"assizes"|"auspices"|"backwoods"|"bacteria"|"banns"|"battlements"|"bedclothes"|"belongings"|"billiards"|"binoculars"|"bitters"|"blandishments"|"bleachers"|"blinkers"|"blues"|"breeches"|"brussels"|"clothes"|"clutches"|"commons"|"confines"|"contents"|"credentials"|"crossbones"|"damages"|"dealings"|"dentures"|"depths"|"devotions"|"diggings"|"doings"|"downs"|"dues"|"dynamics"|"earnings"|"eatables"|"eaves"|"economics"|"electrodynamics"|"electronics"|"entrails"|"environs"|"equities"|"ethics"|"eugenics"|"filings"|"finances"|"folks"|"footlights"|"fumes"|"furnishings"|"genitals"|"glitterati"|"goggles"|"goods"|"grits"|"groceries"|"grounds"|"handcuffs"|"headquarters"|"histrionics"|"hostilities"|"humanities"|"hydraulics"|"hysterics"|"illuminations"|"italics"|"jeans"|"jitters"|"kinetics"|"knickers"|"latitudes"|"leggings"|"likes"|"linguistics"|"lodgings"|"loggerheads"|"mains"|"manners"|"mathematics"|"means"|"measles"|"media"|"memoirs"|"metaphysics"|"mockers"|"motions"|"multimedia"|"munitions"|"news"|"nutria"|"nylons"|"oats"|"odds"|"oils"|"oilskins"|"optics"|"orthodontics"|"outskirts"|"overalls"|"pants"|"pantaloons"|"paras"|"paratroops"|"particulars"|"pediatrics"|"phonemics"|"phonetics"|"physics"|"pincers"|"plastics"|"politics"|"proceeds"|"proceedings"|"prospects"|"pyjamas"|"rations"|"ravages"|"refreshments"|"regards"|"reinforcements"|"remains"|"respects"|"returns"|"riches"|"savings"|"scissors"|"semantics"|"shades"|"shallows"|"shambles"|"shorts"|"singles"|"slacks"|"specifics"|"spectacles"|"spoils"|"statics"|"statistics"|"summons"|"surroundings"|"suspenders"|"takings"|"teens"|"telecommunications"|"tenterhooks"|"thanks"|"theatricals"|"thermodynamics"|"tights"|"toils"|"trappings"|"travels"|"troops"|"tropics"|"trousers"|"tweeds"|"underpants"|"vapours"|"vicissitudes"|"vitals"|"wages"|"wanderings"|"wares"|"whereabouts"|"whites"|"winnings"|"withers"|"woollens"|"workings"|"writings"|"yes") { return(xnull_stem()); }
18321832
<noun,any>("boati"|"bonhomi"|"clippi"|"creepi"|"deari"|"droppi"|"gendarmeri"|"girli"|"goali"|"haddi"|"kooki"|"kyri"|"lambi"|"lassi"|"mari"|"menageri"|"petti"|"reveri"|"snotti"|"sweeti")"es" { return(stem(1,"","s")); }
18331833
<verb,any>("buffet"|"plummet")"t"{EDING} { return(semi_reg_stem(1,"")); }
18341834
<verb,any>"gunsling" { return(cnull_stem()); }
@@ -2102,6 +2102,7 @@ S_ENDING_DEMONYMS = "Afghan"|"Afghani"|"African"|"Albanian"|"Alexandrine"|"Alger
21022102
<scan>"ppl"/_NNS { return(stem(3, "person", "")); }
21032103

21042104
<scan>"Olympics"/_NN(P?)(S?) { return(capitalise(xnull_stem())); }
2105+
/* TODO: add anti-X as an adjective */
21052106
<scan>{SINGULAR_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(xnull_stem())); }
21062107
<scan>{S_ENDING_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(common_noun_stem())); }
21072108
<scan>{S_ENDING_DEMONYMS}s/_(NN(P?)(S?)|JJ) { return(capitalise(stem(1, "", "s"))); }

0 commit comments

Comments
 (0)