@@ -448,7 +448,7 @@ import edu.stanford.nlp.util.logging.Redwood;
448
448
449
449
450
450
451
- A = [ '+a- zA- Z0- 9]
451
+ A = [ '’ +a- zA- Z0- 9]
452
452
V = [ aeiouAEIOU]
453
453
VY = [ aeiouyAEIOUY]
454
454
C = [ bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]
@@ -501,12 +501,12 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
501
501
<verb,any> "did" { return (stem(3 ," do" ," ed" )); }
502
502
<verb,any> "done" { return (stem(4 ," do" ," en" )); }
503
503
<verb,any> "didst" { return (stem(5 ," do" ," ed" )); } /* disprefer */
504
- <verb,any> "' ll" { return (stem(3 ," will" ," " )); }
505
- <verb,any> "' m" { return (stem(2 ," be" ," " )); } /* disprefer */
504
+ <verb,any> [ '’ ] " ll" { return (stem(3 ," will" ," " )); }
505
+ <verb,any> [ '’ ] " m" { return (stem(2 ," be" ," " )); } /* disprefer */
506
506
<verb,any> "m" { return (stem(1 ," be" ," " )); } /* disprefer */
507
- <verb,any> "' re" { return (stem(3 ," be" ," " )); } /* disprefer */
507
+ <verb,any> [ '’ ] " re" { return (stem(3 ," be" ," " )); } /* disprefer */
508
508
<verb,any> "r" { return (stem(1 ," be" ," " )); } /* disprefer */
509
- <verb,any> "' ve" { return (stem(3 ," have" ," " )); }
509
+ <verb,any> [ '’ ] " ve" { return (stem(3 ," have" ," " )); }
510
510
<verb,any> "ve" { return (stem(2 ," have" ," " )); }
511
511
<verb,any> "v" { return (stem(1 ," have" ," " )); }
512
512
<verb,any> "no" { return (stem(2 ," know" ," " )); }
@@ -971,7 +971,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
971
971
<verb,any> "shrunken" { return (stem(5 ," ink" ," en" )); } /* disprefer */
972
972
<verb,any> "sightsaw" { return (stem(3 ," see" ," ed" )); }
973
973
<verb,any> "sightseen" { return (stem(3 ," ee" ," en" )); }
974
- <verb,any> "ski' d" { return (stem(3 ," i" ," ed" )); } /* en */
974
+ <verb,any> "ski" [ '’ ] " d" { return (stem(3 ," i" ," ed" )); } /* en */
975
975
<verb,any> "skydove" { return (stem(3 ," ive" ," ed" )); } /* en */
976
976
<verb,any> "slain" { return (stem(3 ," ay" ," en" )); }
977
977
<verb,any> "slept" { return (stem(3 ," eep" ," ed" )); } /* en */
@@ -1160,7 +1160,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
1160
1160
<noun,any> ( "Brahman" | "German" | "dragoman" | "ottoman" | "shaman" | "talisman" | "Norman" | "Pullman" | "Roman" ) "s" { return (stem(1 ," " ," s" )); }
1161
1161
<noun,any> ( "Czech" | "diptych" | "Sassenach" | "abdomen" | "alibi" | "aria" | "bandit" | "begonia" | "bikini" | "caryatid" | "colon" | "cornucopia" | "cromlech" | "cupola" | "dryad" | "eisteddfod" | "encyclopaedia" | "epoch" | "eunuch" | "flotilla" | "gardenia" | "gestalt" | "gondola" | "hierarch" | "hose" | "impediment" | "koala" | "loch" | "mania" | "manservant" | "martini" | "matriarch" | "monarch" | "oligarch" | "omen" | "parabola" | "pastorale" | "patriarch" | "pea" | "peninsula" | "pfennig" | "phantasmagoria" | "pibroch" | "poly" | "real" | "safari" | "sari" | "specimen" | "standby" | "stomach" | "swami" | "taxi" | "tech" | "toccata" | "triptych" | "villa" | "yogi" | "zloty" ) "s" { return (stem(1 ," " ," s" )); }
1162
1162
<noun,any> ( "asyl" | "sanct" | "rect" | "pl" | "pendul" | "mausole" | "hoodl" | "for" ) "ums" { return (stem(1 ," " ," s" )); }
1163
- <noun,any>("Bantu"|"Bengalese"|"Beninese"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Gabonese"|"Guyanese"|"Japanese"|"Javanese"|"Lebanese"|"Maltese"|"Olympics"|"Portuguese"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Sudanese"|"Swiss"|"Taiwanese"|"Togolese"|"Vietnamese"|"aircraft"|"anopheles"|"apparatus"|"asparagus"|"barracks"|"bellows"|"bison"|"bluefish"|"bob"|"bourgeois"|"bream"|"brill"|"butterfingers"|"carp"|"catfish"|"chassis"|"chub"|"cod"|"codfish"|"coley"|"contretemps"|"corps"|"crawfish"|"crayfish"|"crossroads"|"cuttlefish"|"dace"|"dice"|"dogfish"|"doings"|"dory"|"downstairs"|"eldest"|"finnan"|"firstborn"|"fish"|"flatfish"|"flounder"|"fowl"|"fry"|"fries"|{A}+"-works"|"gasworks"|"glassworks"|"globefish"|"goldfish"|"grand"|"gudgeon"|"gulden"|"haddock"|"hake"|"halibut"|"headquarters"|"herring"|"hertz"|"horsepower"|"hovercraft"|"hundredweight"|"ironworks"|"jackanapes"|"kilohertz"|"kurus"|"kwacha"|"ling"|"lungfish"|"mackerel"|"means"|"megahertz"|"moorfowl"|"moorgame"|"mullet"|"offspring"|"pampas"|"parr"|"patois"|"pekinese"|"penn'orth"|"perch"|"pickerel"|"pike"|"pince-nez"|"plaice"|"precis"|"quid"|"rand"|"rendezvous"|"revers"|"roach"|"roux"|"salmon"|"samurai"|"series"|"shad"|"sheep"|"shellfish"|"smelt"|"spacecraft"|"species"|"starfish"|"stockfish"|"sunfish"|"superficies"|"sweepstakes"|"swordfish"|"tench"|"tope"|"triceps"|"trout"|"tuna"|"tunafish"|"tunny"|"turbot"|"undersigned"|"veg"|"waterfowl"|"waterworks"|"waxworks"|"whiting"|"wildfowl"|"woodworm"|"yen") { return(xnull_stem()); }
1163
+ <noun,any>("Bantu"|"Bengalese"|"Beninese"|"Boche"|"Burmese"|"Chinese"|"Congolese"|"Gabonese"|"Guyanese"|"Japanese"|"Javanese"|"Lebanese"|"Maltese"|"Olympics"|"Portuguese"|"Senegalese"|"Siamese"|"Singhalese"|"Sinhalese"|"Sioux"|"Sudanese"|"Swiss"|"Taiwanese"|"Togolese"|"Vietnamese"|"aircraft"|"anopheles"|"apparatus"|"asparagus"|"barracks"|"bellows"|"bison"|"bluefish"|"bob"|"bourgeois"|"bream"|"brill"|"butterfingers"|"carp"|"catfish"|"chassis"|"chub"|"cod"|"codfish"|"coley"|"contretemps"|"corps"|"crawfish"|"crayfish"|"crossroads"|"cuttlefish"|"dace"|"dice"|"dogfish"|"doings"|"dory"|"downstairs"|"eldest"|"feces"|"finnan"|"firstborn"|"fish"|"flatfish"|"flounder"|"fowl"|"fry"|"fries"|{A}+"-works"|"gasworks"|"glassworks"|"globefish"|"goldfish"|"grand"|"gudgeon"|"gulden"|"haddock"|"hake"|"halibut"|"headquarters"|"herring"|"hertz"|"horsepower"|"hovercraft"|"hundredweight"|"ironworks"|"jackanapes"|"kilohertz"|"kurus"|"kwacha"|"ling"|"lungfish"|"mackerel"|"means"|"megahertz"|"moorfowl"|"moorgame"|"mullet"|"offspring"|"pampas"|"parr"|"patois"|"pekinese"|"penn'orth"|"perch"|"pickerel"|"pike"|"pince-nez"|"plaice"|"precis"|"quid"|"rand"|"rendezvous"|"revers"|"roach"|"roux"|"salmon"|"samurai"|"series"|"shad"|"sheep"|"shellfish"|"smelt"|"spacecraft"|"species"|"starfish"|"stockfish"|"sunfish"|"superficies"|"sweepstakes"|"swordfish"|"tench"|"tope"|"triceps"|"trout"|"tuna"|"tunafish"|"tunny"|"turbot"|"undersigned"|"veg"|"waterfowl"|"waterworks"|"waxworks"|"whiting"|"wildfowl"|"woodworm"|"yen") { return(xnull_stem()); }
1164
1164
<noun,any> "Aries" { return (stem(1 ," s" ," s" )); }
1165
1165
<noun,any> "Pisces" { return (stem(1 ," s" ," s" )); }
1166
1166
<noun,any> "Bengali" { return (stem(1 ," i" ," s" )); }
@@ -1250,7 +1250,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
1250
1250
<noun,any> "clutches" { return (stem(2 ," " ," s" )); }
1251
1251
<noun,any> "continua" { return (stem(1 ," um" ," s" )); }
1252
1252
<noun,any> "diggings" { return (stem(1 ," " ," s" )); }
1253
- <noun,any> "K' s" { return (stem(2 ," " ," s" )); }
1253
+ <noun,any> "K" [ '’ ] " s" { return (stem(2 ," " ," s" )); }
1254
1254
<noun,any> "seychellois" { return (stem(1 ," s" ," s" )); }
1255
1255
<noun,any> "afterlives" { return (stem(3 ," fe" ," s" )); }
1256
1256
<noun,any> "avens" { return (stem(1 ," s" ," s" )); }
@@ -1469,7 +1469,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
1469
1469
<noun,any> "maxima" { return (stem(2 ," mum" ," s" )); }
1470
1470
<noun,any> "memoranda" { return (stem(2 ," dum" ," s" )); }
1471
1471
<noun,any> "men-at-arms" { return (stem(10 ," an-at-arms" ," s" )); }
1472
- <noun,any> "men-o' -war" { return (stem(9 ," an-of-war" ," s" )); } /* disprefer */
1472
+ <noun,any> "men-o" [ '’ ] " -war" { return (stem(9 ," an-of-war" ," s" )); } /* disprefer */
1473
1473
<noun,any> "men-of-war" { return (stem(9 ," an-of-war" ," s" )); }
1474
1474
<noun,any> "menservants" { return (stem(10 ," anservant" ," s" )); } /* disprefer */
1475
1475
<noun,any> "mesdemoiselles" { return (stem(13 ," ademoiselle" ," s" )); }
@@ -1874,11 +1874,11 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
1874
1874
/* -o / -oe */
1875
1875
1876
1876
<verb,any> ( "bastinado" | "bunco" | "bunko" | "carbonado" | "contango" | "crescendo" | "ditto" | "echo" | "embargo" | "fresco" | "hallo" | "halo" | "lasso" | "niello" | "radio" | "solo" | "stiletto" | "stucco" | "tally-ho" | "tango" | "torpedo" | "veto" | "zero" ) "ed" { return (stem(2 ," " ," ed" )); } /* en */
1877
- <verb,any> "ko' d" { return (stem(3 ," o" ," ed" )); } /* en */
1878
- <verb,any> "ko' ing" { return (stem(4 ," " ," ing" )); }
1879
- <verb,any> "ko' s" { return (stem(2 ," " ," s" )); }
1880
- <verb,any> "tally-ho' d" { return (stem(3 ," " ," ed" )); } /* en */ /* disprefer */
1881
- <noun,any> ( "co" | "do" | "ko" | "no" ) "' s" { return (stem(2 ," " ," s" )); }
1877
+ <verb,any> "ko" [ '’ ] " d" { return (stem(3 ," o" ," ed" )); } /* en */
1878
+ <verb,any> "ko" [ '’ ] " ing" { return (stem(4 ," " ," ing" )); }
1879
+ <verb,any> "ko" [ '’ ] " s" { return (stem(2 ," " ," s" )); }
1880
+ <verb,any> "tally-ho" [ '’ ] " d" { return (stem(3 ," " ," ed" )); } /* en */ /* disprefer */
1881
+ <noun,any> ( "co" | "do" | "ko" | "no" ) [ '’ ] " s" { return (stem(2 ," " ," s" )); }
1882
1882
1883
1883
<noun,any> ( "aloe" | "archfoe" | "canoe" | "doe" | "felloe" | "floe" | "foe" | "hammertoe" | "hoe" | "icefloe" | "mistletoe" | "oboe" | "roe" |( {A} * "shoe" )| "sloe" | "throe" | "tiptoe" | "toe" | "voe" | "woe" ) "s" { return (stem(1 ," " ," s" )); }
1884
1884
<verb,any> ( "canoe" | "hoe" | "outwoe" | "rehoe" |( {A} * "shoe" )| "tiptoe" | "toe" ) "s" { return (stem(1 ," " ," s" )); }
@@ -1919,7 +1919,7 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
1919
1919
<noun,any> "m.p.s." { return (stem(6 ," m.p." ," s" )); }
1920
1920
<noun,any> ( "cons" | "miss" | "mrs" | "ms" | "n-s" | "pres" | "ss" ) "." { return (cnull_stem()); }
1921
1921
<noun,any> ( {A} | "." )+ ".s." { return (cnull_stem()); }
1922
- <noun,any> ( {A} | "." )+ ".' s." { return (stem(4 ," ." ," s" )); } /* disprefer */
1922
+ <noun,any> ( {A} | "." )+ "." [ '’ ] " s." { return (stem(4 ," ." ," s" )); } /* disprefer */
1923
1923
<noun,any> ( {A} | "." )+ "s." { return (stem(2 ," ." ," s" )); }
1924
1924
1925
1925
<noun,any> {A} * "men" { return (stem(2 ," an" ," s" )); }
@@ -2042,19 +2042,19 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
2042
2042
<scan> "is" / _VBZ { return (stem(2 ," be" ," s" )); }
2043
2043
<scan> "du" / _VBP { return (stem(2 ," do" ," " )); } /* In dunno */
2044
2044
<scan> "no" / _VB { return (stem(2 ," know" ," " )); } /* In dunno */
2045
- <scan> "' d"/ _VH { return (stem(2 ," have" ," ed" )); } /* disprefer */
2046
- <scan> "' d"/ _VBD { return (stem(2 ," have" ," ed" )); } /* disprefer */
2047
- <scan> "' d"/ _VM { return (stem(2 ," would" ," " )); }
2048
- <scan> "' d"/ _MD { return (stem(2 ," would" ," " )); }
2045
+ <scan> [ '’ ] " d"/ _VH { return (stem(2 ," have" ," ed" )); } /* disprefer */
2046
+ <scan> [ '’ ] " d"/ _VBD { return (stem(2 ," have" ," ed" )); } /* disprefer */
2047
+ <scan> [ '’ ] " d"/ _VM { return (stem(2 ," would" ," " )); }
2048
+ <scan> [ '’ ] " d"/ _MD { return (stem(2 ," would" ," " )); }
2049
2049
<scan> "d" / _MD { return (stem(1 ," would" ," " )); }
2050
- <scan> "' s"/ _VBZ { return (stem(2 ," be" ," s" )); } /* disprefer */ /* could really be have */
2050
+ <scan> [ '’ ] " s"/ _VBZ { return (stem(2 ," be" ," s" )); } /* disprefer */ /* could really be have */
2051
2051
<scan> "s" / _VBZ { return (stem(1 ," be" ," s" )); } /* disprefer */ /* could really be have */
2052
- <scan> "' s"/ _VDZ { return (stem(2 ," do" ," s" )); } /* disprefer */
2053
- <scan> "' s"/ _VHZ { return (stem(2 ," have" ," s" )); } /* disprefer */
2054
- <scan> "' s"/ _"$" { return (stem(2 ," 's" ," " )); }
2055
- <scan> "' s"/ _POS { return (stem(2 ," 's" ," " )); }
2056
- <scan> "' s"/ _CSA { return (stem(2 ," as" ," " )); }
2057
- <scan> "' s"/ _CJS { return (stem(2 ," as" ," " )); }
2052
+ <scan> [ '’ ] " s"/ _VDZ { return (stem(2 ," do" ," s" )); } /* disprefer */
2053
+ <scan> [ '’ ] " s"/ _VHZ { return (stem(2 ," have" ," s" )); } /* disprefer */
2054
+ <scan> [ '’ ] " s"/ _"$" { return (stem(2 ," 's" ," " )); }
2055
+ <scan> [ '’ ] " s"/ _POS { return (stem(2 ," 's" ," " )); }
2056
+ <scan> [ '’ ] " s"/ _CSA { return (stem(2 ," as" ," " )); }
2057
+ <scan> [ '’ ] " s"/ _CJS { return (stem(2 ," as" ," " )); }
2058
2058
<scan> "not" / _XX { return (stem(3 ," not" ," " )); }
2059
2059
<scan> "ai" / _VB { return (stem(2 ," be" ," " )); } /* disprefer */
2060
2060
<scan> "ai" / _VH { return (stem(2 ," have" ," " )); } /* disprefer */
@@ -2064,12 +2064,13 @@ COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
2064
2064
<scan> "sha" / _MD { return (stem(3 ," shall" ," " )); }
2065
2065
<scan> "wo" / _VM { return (stem(2 ," will" ," " )); } /* disprefer */
2066
2066
<scan> "wo" / _MD { return (stem(2 ," will" ," " )); } /* disprefer */
2067
- <scan> "' ll"/ _MD { return (stem(3 ," will" ," " )); }
2067
+ <scan> [ '’ ] " ll"/ _MD { return (stem(3 ," will" ," " )); }
2068
2068
<scan> "ll" / _MD { return (stem(2 ," will" ," " )); }
2069
2069
<scan> "wilt" / _MD { return (stem(4 ," will" ," " )); }
2070
- <scan> "n't" / _XX { return (stem(3 ," not" ," " )); } /* disprefer */
2071
- <scan> "n't" / _RB { return (stem(3 ," not" ," " )); } /* cdm add; disprefer */
2072
- <scan> "n" / _RB { return (stem(1 ," not" ," " )); } /* cdm add; disprefer */
2070
+ <scan> "n" [ '’] "t" / _XX { return (stem(3 ," not" ," " )); } /* disprefer */
2071
+ <scan> "n" [ '’] "t" / _RB { return (stem(3 ," not" ," " )); } /* cdm add; disprefer */
2072
+ <scan> "nt" / _RB { return (stem(2 ," not" ," " )); } /* luffa add; disprefer? */
2073
+ <scan> "n" / _RB { return (stem(1 ," not" ," " )); } /* cdm add; disprefer */
2073
2074
<scan> "him" / _P { return (stem(3 ," he" ," " )); }
2074
2075
<scan> "her" / _P { return (stem(3 ," she" ," " )); }
2075
2076
<scan> "them" / _P { return (stem(1 ," y" ," " )); }
0 commit comments