Skip to content

Commit 56362e9

Browse files
committed
Add a bunch of demonyms
1 parent a23d075 commit 56362e9

File tree

2 files changed

+77024
-74066
lines changed

2 files changed

+77024
-74066
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,8 @@ EY_ADJS = "cag"|"cak"|"clay"|"cliqu"|"crep"|"dic"|"dop"|"glu"|"goo"|"grip"|"hok"
474474

475475
COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
476476

477-
SINGULAR_DEMONYMS = "Bantu"|"Bengalese"|"Beninese"|"British"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Danish"|"English"|"Finnish"|"Flemish"|"French"|"Gabonese"|"Guyanese"|"Irish"|"Japanese"|"Javanese"|"Jewish"|"Lebanese"|"Maltese"|"Moorish"|"Nepalese"|"Nipponese"|"Polish"|"Portuguese"|"Rhenish"|"Romish"|"Scottish"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Spanish"|"Sudanese"|"Swedish"|"Swiss"|"Taiwanese"|"Togolese"|"Turkish"|"Vietnamese"|"Welsh"|"Yiddish"
477+
SINGULAR_DEMONYMS = "Bantu"|"Bengalese"|"Beninese"|"Borg"|"British"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Danish"|"English"|"Ferengi"|"Finnish"|"Flemish"|"French"|"Gabonese"|"Guyanese"|"Irish"|"Japanese"|"Javanese"|"Jewish"|"Jóola"|"Lebanese"|"Maltese"|"Moorish"|"Nepalese"|"Nipponese"|"Polish"|"Portuguese"|"Rhenish"|"Romish"|"Scottish"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Spanish"|"Sudanese"|"Swedish"|"Swiss"|"Taiwanese"|"Togolese"|"Turkish"|"Vietnamese"|"Welsh"|"Yiddish"
478+
S_ENDING_DEMONYMS = "Afghan"|"Afghani"|"African"|"Albanian"|"Alexandrine"|"Algerian"|"Alsatian"|"American"|"Amharic"|"Andorian"|"Andorran"|"Anglican"|"Angolan"|"Anguillan"|"Antiguan"|"Arab"|"Arabian"|"Arabic"|"Arcadian"|"Argentinian"|"Aristotelian"|"Aryan"|"Asian"|"Asiatic"|"Attic"|"Augustan"|"Australian"|"Austrian"|"Ba\'athist"|"Baathist"|"Baghdadi"|"Bahamiam"|"Bahamian"|"Bangladeshi"|"Barbadian"|"Belgian"|"Bengali"|"Bermudan"|"Bolian"|"Bolivian"|"Brazilian"|"Britannic"|"Bruneian"|"Bulgarian"|"Burundian"|"Caesarian"|"Californian"|"Cambodian"|"Cameroonian"|"Canadian"|"Catalan"|"Caucasian"|"Celtic"|"Cesarean"|"Chadian"|"Chilean"|"Christian"|"Colombian"|"Confucian"|"Copernican"|"Coptic"|"Corinthian"|"Cuban"|"Cyclopean"|"Cyprian"|"Cyrillic"|"Czech"|"Czechoslovakian"|"Delphic"|"Democrat"|"Djiboutian"|"Dominican"|"Doric"|"Ecuadorian"|"Egyptian"|"Elizabethan"|"Elysian"|"Eritrean"|"Ethiopian"|"Euclidean"|"Eurasian"|"European"|"Eustachian"|"Evangelical"|"Fabian"|"Fallopian"|"Fijian"|"Filipino"|"Firang"|"Franciscan"|"Freudian"|"Gaelic"|"Gallic"|"Gambian"|"Georgian"|"German"|"Germanic"|"Ghanaian"|"Gibraltarian"|"Gilbertian"|"Gordian"|"Gothic"|"Grecian"|"Greek"|"Gregorian"|"Grenadian"|"Guatemalan"|"Guinean"|"Haitian"|"Hebraic"|"Hellenic"|"Hertzian"|"Hippocratic"|"Hispanic"|"Homeric"|"Honduran"|"Hungarian"|"Husker"|"Icelandic"|"Indian"|"Indonesian"|"Ionic"|"Iranian"|"Iraqi"|"Islamic"|"Israeli"|"Italian"|"Jacobean"|"Jamaican"|"Jordanian"|"Judaic"|"Julian"|"Kampuchean"|"Kenyan"|"Korean"|"Kurd"|"Laotian"|"Liberian"|"Libertarian"|"Libyan"|"Lilliputian"|"Lutheran"|"Macedonian"|"Madagascan"|"Malawian"|"Malayan"|"Malaysian"|"Malian"|"Malthusian"|"Martian"|"Masonic"|"Mauritanian"|"Mauritian"|"Mediterranean"|"Mendelian"|"Mephistophelian"|"Mexican"|"Mohammedan"|"Mongolian"|"Montserratian"|"Moroccan"|"Mosaic"|"Moslem"|"Mozambican"|"Muhammadan"|"Muslim"|"Namibian"|"Napoleonic"|"Nauruan"|"Nazi"|"Neapolitan"|"Newtonian"|"Nicaraguan"|"Nigerian"|"Nilotic"|"Nordic"|"Norman"|"Norwegian"|"Olympian"|"Olympic"|"Oxonian"|"Pakistani"|"Palestinian"|"Panamanian"|"Papuan"|"Paraguayan"|"Parisian"|"Parmesan"|"Parthian"|"Pashtun"|"Persian"|"Peruvian"|"Platonic"|"Presbyterian"|"Protestant"|"Prussian"|"Punic"|"Pyrrhic"|"Rabelaisian"|"Ramadan"|"Republican"|"Romaic"|"Roman"|"Romanian"|"Rotarian"|"Ruritanian"|"Russian"|"Rwandan"|"Sabahan"|"Saddamite"|"Sadrist"|"Salafi"|"Salvadorean"|"Samaritan"|"Samoan"|"Sapphic"|"Sarawakian"|"Satanic"|"Saudi"|"Scandinavian"|"Semitic"|"Shakespearian"|"Shavian"|"Shiite"|"Siberian"|"Sicilian"|"Singaporean"|"Sinhala"|"Slavonic"|"Slovenian"|"Socratic"|"Somali"|"Somalian"|"Spartan"|"Stygian"|"Sumatran"|"Sunni"|"Syrian"|"Tahitian"|"Tamil"|"Tanzanian"|"Terpsichorean"|"Teutonic"|"Texan"|"Thai"|"Thespian"|"Tibetan"|"Tobagonian"|"Tongan"|"Trinidadian"|"Trojan"|"Tunisian"|"Ugandan"|"Unitarian"|"Uruguayan"|"Utopian"|"Vatican"|"Venetian"|"Venezuelan"|"Victorian"|"Vulcan"|"Wahhabi"|"Wesleyan"|"Westerner"|"Yugoslavian"|"Zairean"|"Zambian"|"Zimbabwean"|"Zionist"
478479

479480
%%
480481

@@ -2100,8 +2101,10 @@ SINGULAR_DEMONYMS = "Bantu"|"Bengalese"|"Beninese"|"British"|"Boche"|"Burmese"|"
21002101
<scan>{A}*"people"/_NNS { return(stem(5, "erson", "")); }
21012102
<scan>"ppl"/_NNS { return(stem(3, "person", "")); }
21022103

2103-
<scan>"Olympics"/_NN(P?)(S?) { return(capitalise(xnull_stem())); }
2104-
<scan>{SINGULAR_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(xnull_stem())); }
2104+
<scan>"Olympics"/_NN(P?)(S?) { return(capitalise(xnull_stem())); }
2105+
<scan>{SINGULAR_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(xnull_stem())); }
2106+
<scan>{S_ENDING_DEMONYMS}/_(NN(P?)(S?)|JJ) { return(capitalise(common_noun_stem())); }
2107+
<scan>{S_ENDING_DEMONYMS}s/_(NN(P?)(S?)|JJ) { return(capitalise(stem(1, "", "s"))); }
21052108

21062109
<scan>"worse"/_JJR { return(stem(5, "bad", "")); }
21072110
<scan>"worst"/_JJS { return(stem(5, "bad", "")); }

0 commit comments

Comments
 (0)