Skip to content

Commit 74fa642

Browse files
committed
Also lemmatize comp/sup adverbs as best as we can
1 parent 655de55 commit 74fa642

File tree

2 files changed

+70018
-71805
lines changed

2 files changed

+70018
-71805
lines changed

src/edu/stanford/nlp/process/Morpha.flex

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ XX_ADJS = "awfull"|"badd"|"bigg"|"bumm"|"carefull"|"cheerfull"|"cruell"|"dimm"|"
472472
/* adjectives such as gooey which become gooier, gooiest */
473473
EY_ADJS = "cag"|"cak"|"clay"|"cliqu"|"crep"|"dic"|"dop"|"glu"|"goo"|"grip"|"hok"|"hom"|"hors"|"jok"|"lak"|"mop"|"shal"
474474

475+
COMP_SUP = "JJR"|"JJS"|"RBR"|"RBS"
475476

476477
%%
477478

@@ -2097,16 +2098,22 @@ EY_ADJS = "cag"|"cak"|"clay"|"cliqu"|"crep"|"dic"|"dop"|"glu"|"goo"|"grip"|"hok"
20972098
<scan>"better"/_RBR { return(stem(6, "well", "")); }
20982099
<scan>"best"/_RBS { return(stem(4, "well", "")); }
20992100

2100-
<scan>{E_ADJS}r/_JJ[RS] { return(stem(1, "", "")); }
2101-
<scan>{E_ADJS}st/_JJ[RS] { return(stem(2, "", "")); }
2102-
<scan>{XX_ADJS}er/_JJ[RS] { return(stem(3, "", "")); }
2103-
<scan>{XX_ADJS}est/_JJ[RS] { return(stem(4, "", "")); }
2104-
<scan>{EY_ADJS}ier/_JJ[RS] { return(stem(3, "ey", "")); }
2105-
<scan>{EY_ADJS}iest/_JJ[RS] { return(stem(4, "ey", "")); }
2106-
<scan>{G}+ier/_JJ[RS] { return(stem(3, "y", "")); }
2107-
<scan>{G}+iest/_JJ[RS] { return(stem(4, "y", "")); }
2108-
<scan>{G}+er/_JJ[RS] { return(stem(2, "", "")); }
2109-
<scan>{G}+est/_JJ[RS] { return(stem(3, "", "")); }
2101+
/* further_JJR discussion stays further in GUM */
2102+
<scan>"further"/_JJR { return(stem(0, "", "")); }
2103+
/* further_RBR extend becomes far */
2104+
<scan>"f"[au]"rther"/_RBR { return(stem(6, "ar", "")); }
2105+
<scan>"f"[au]"rthest"/_RBS { return(stem(7, "ar", "")); }
2106+
2107+
<scan>{E_ADJS}r/_{COMP_SUP} { return(stem(1, "", "")); }
2108+
<scan>{E_ADJS}st/_{COMP_SUP} { return(stem(2, "", "")); }
2109+
<scan>{XX_ADJS}er/_{COMP_SUP} { return(stem(3, "", "")); }
2110+
<scan>{XX_ADJS}est/_{COMP_SUP} { return(stem(4, "", "")); }
2111+
<scan>{EY_ADJS}ier/_{COMP_SUP} { return(stem(3, "ey", "")); }
2112+
<scan>{EY_ADJS}iest/_{COMP_SUP} { return(stem(4, "ey", "")); }
2113+
<scan>{G}+ier/_{COMP_SUP} { return(stem(3, "y", "")); }
2114+
<scan>{G}+iest/_{COMP_SUP} { return(stem(4, "y", "")); }
2115+
<scan>{G}+er/_{COMP_SUP} { return(stem(2, "", "")); }
2116+
<scan>{G}+est/_{COMP_SUP} { return(stem(3, "", "")); }
21102117

21112118
<scan>{G}+/_NN[^P] { yybegin(noun); yypushback(yylength()); return(next()); }
21122119
<scan>{G}+/_NNP { return(proper_name_stem()); }

0 commit comments

Comments
 (0)