Skip to content

Commit ef90223

Browse files
authored
Merge pull request #1156 from stanfordnlp/dev
Update for 4.2.1 release
2 parents 16ac6de + f674c96 commit ef90223

File tree

193 files changed

+11168
-2176
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

193 files changed

+11168
-2176
lines changed

.classpath

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
<classpathentry exported="true" kind="lib" path="lib/junit-4.13.1.jar" sourcepath="libsrc/junit-4.13.1-sources.jar"/>
1616
<classpathentry exported="true" kind="lib" path="lib/javax.servlet.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
1717
<classpathentry exported="true" kind="lib" path="lib/AppleJavaExtensions.jar"/>
18-
<classpathentry exported="true" kind="lib" path="lib/jflex-1.8.2.jar" sourcepath="libsrc/jflex-1.8.2-src.zip"/>
18+
<classpathentry exported="true" kind="lib" path="lib/jflex-full-1.8.2.jar" sourcepath="libsrc/jflex-1.8.2.zip"/>
1919
<classpathentry exported="true" kind="lib" path="lib/xom-1.3.2.jar" sourcepath="libsrc/xom-1.3.2-sources.jar"/>
2020
<classpathentry exported="true" kind="lib" path="lib/joda-time.jar" sourcepath="libsrc/joda-time-2.10.5-sources.jar"/>
2121
<classpathentry exported="true" kind="lib" path="lib/jollyday-0.4.9.jar" sourcepath="libsrc/jollyday-0.4.9-sources.jar"/>
@@ -32,16 +32,17 @@
3232
<classpathentry exported="true" kind="lib" path="lib/slf4j-simple.jar" sourcepath="libsrc/slf4j-simple-1.7.12-sources.jar"/>
3333
<classpathentry exported="true" kind="lib" path="lib/javax.activation-api-1.2.0.jar" sourcepath="libsrc/javax.activation-api-1.2.0-sources.jar"/>
3434
<classpathentry exported="true" kind="lib" path="lib/jaxb-api-2.4.0-b180830.0359.jar" sourcepath="libsrc/jaxb-api-2.4.0-b180830.0359-sources.jar"/>
35-
<classpathentry exported="true" kind="lib" path="lib/jaxb-core-2.3.0.1.jar" sourcepath="libsrc/jaxb-core-2.3.0.1-sources.jar"/>
35+
<classpathentry exported="true" kind="lib" path="lib/istack-commons-runtime-3.0.7.jar" sourcepath="libsrc/istack-commons-runtime-3.0.7-sources.jar"/>
3636
<classpathentry exported="true" kind="lib" path="lib/jaxb-impl-2.4.0-b180830.0438.jar" sourcepath="libsrc/jaxb-impl-2.4.0-b180830.0438-sources.jar"/>
3737
<classpathentry exported="true" kind="lib" path="lib/ejml-core-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-core-0.39-sources.jar"/>
3838
<classpathentry exported="true" kind="lib" path="lib/ejml-ddense-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-ddense-0.39-sources.jar"/>
3939
<classpathentry exported="true" kind="lib" path="lib/ejml-simple-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-simple-0.39-sources.jar"/>
40+
<classpathentry exported="true" kind="lib" path="lib/json-simple.jar" />
4041
<classpathentry kind="lib" path="liblocal/antlr-runtime.jar"/>
41-
<classpathentry kind="lib" path="liblocal/hamcrest-2.2.jar " sourcepath="libsrc/hamcrest-2.2-sources.jar"/>
42+
<classpathentry kind="lib" path="liblocal/hamcrest-2.2.jar" sourcepath="libsrc/hamcrest-2.2-sources.jar"/>
4243
<classpathentry kind="lib" path="liblocal/javaruntype.jar"/>
43-
- <classpathentry kind="lib" path="liblocal/junit-quickcheck-core-0.4.jar" sourcepath="libsrc/junit-quickcheck-core-0.4-sources.jar"/>
44-
- <classpathentry kind="lib" path="liblocal/junit-quickcheck-generators-0.4.jar" sourcepath="libsrc/junit-quickcheck-generators-0.4-sources.jar"/>
44+
<classpathentry kind="lib" path="liblocal/junit-quickcheck-core-0.4.jar" sourcepath="libsrc/junit-quickcheck-core-0.4-sources.jar"/>
45+
<classpathentry kind="lib" path="liblocal/junit-quickcheck-generators-0.4.jar" sourcepath="libsrc/junit-quickcheck-generators-0.4-sources.jar"/>
4546
<classpathentry kind="lib" path="liblocal/junit-theories.jar"/>
4647
<classpathentry kind="lib" path="liblocal/ognl.jar"/>
4748
<classpathentry kind="output" path="classes"/>

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,7 @@ classes/**
1515
.idea/**
1616

1717
# emacs
18-
*~
18+
*~
19+
20+
# generated javadoc
21+
javadoc/**

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sourceCompatibility = 1.8
1111
targetCompatibility = 1.8
1212
compileJava.options.encoding = 'UTF-8'
1313

14-
version = '4.2.0'
14+
version = '4.2.1'
1515

1616
// Gradle application plugin
1717
mainClassName = "edu.stanford.nlp.pipeline.StanfordCoreNLP"

build.xml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
<property name="data.path" value="${basedir}/data" />
99
<property name="tests.path" value="${basedir}/test/src" />
1010
<property name="itests.path" value="${basedir}/itest/src" />
11+
<property name="javadoc.path" value="${basedir}/javadoc"/>
1112

1213
<!-- included tasks may use project.core as a property name -->
1314
<property name="project.core" value="${basedir}" />
@@ -46,6 +47,45 @@
4647
<delete dir="${build.path}"/>
4748
</target>
4849

50+
<!-- ==================== Javadoc Target ================================ -->
51+
52+
<target name="javadoc" depends="compile"
53+
description="Create Javadoc API documentation">
54+
55+
<mkdir dir="${javadoc.path}"/>
56+
<javadoc destdir="${javadoc.path}"
57+
maxmemory="1200m"
58+
author="true"
59+
source="1.8"
60+
Overview="${basedir}/src/edu/stanford/nlp/overview.html"
61+
Doctitle="Stanford JavaNLP API Documentation"
62+
Windowtitle="Stanford JavaNLP API"
63+
Encoding="UTF-8"
64+
packagenames="*">
65+
<!-- Allow @generated, @modifiable and @ordered tags -->
66+
<tag name="generated" description="Generated" />
67+
<tag name="modifiable" description="Modifiable" />
68+
<tag name="ordered" description="Ordered" />
69+
70+
<!-- Point Javadoc to source directory -->
71+
<sourcepath>
72+
<pathelement path="${source.path}" />
73+
</sourcepath>
74+
75+
<!-- Depends on all libs and classes folder -->
76+
<classpath>
77+
<pathelement path="${build.path}" />
78+
<pathelement path="${basedir}/lib/*" />
79+
<pathelement path="${basedir}/liblocal/*" />
80+
<pathelement path="${java.class.path}"/>
81+
</classpath>
82+
83+
<bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
84+
<link href="http://docs.oracle.com/javase/8/docs/api/"/>
85+
</javadoc>
86+
87+
</target>
88+
4989
<target name="javacceverything" depends="classpath"
5090
description="javacc everything that can be javacced">
5191
<taskdef resource="net/sf/antcontrib/antlib.xml">
@@ -397,6 +437,14 @@
397437
</jar>
398438
</target>
399439

440+
<target name="scenegraph.jar" depends="compile"
441+
description="build the semantic graph jar file">
442+
<jar destfile="scenegraph.jar">
443+
<fileset dir="${build.path}" includes="edu/stanford/nlp/scenegraph/**" />
444+
</jar>
445+
</target>
446+
447+
400448
<!-- I wrote this for a side project, so if you one day see it in a
401449
build file of mine, you can't sue me for taking it from
402450
Stanford. -jab -->

doc/corenlp/README.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ LICENSE
4141
CHANGES
4242
---------------------------------
4343

44+
2021-05-05 4.2.1 Fix Turkish locale bug, QuoteAnnotator crash
45+
fixes, smaller srparser models, improvements
46+
to enhanced UD converter, Updated dependencies
47+
(istack, protobuf), batch processing of
48+
semgrex & enhancer requests when using stanza
49+
4450
2020-11-16 4.2.0 Bug fixes, Retrained English parser models
4551
with improved trees, Updated dependencies
4652
(ejml, junit, jflex), Speed up loading

doc/corenlp/pom-full.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-corenlp</artifactId>
5-
<version>4.2.0</version>
5+
<version>4.2.1</version>
66
<packaging>jar</packaging>
77
<name>Stanford CoreNLP</name>
88
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</url>
18-
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</connection>
17+
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.1.zip</url>
18+
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.1.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>
@@ -195,7 +195,7 @@
195195
<configuration>
196196
<artifacts>
197197
<artifact>
198-
<file>${project.basedir}/stanford-corenlp-4.2.0-models.jar</file>
198+
<file>${project.basedir}/stanford-corenlp-4.2.1-models.jar</file>
199199
<type>jar</type>
200200
<classifier>models</classifier>
201201
</artifact>

doc/corenlp/pom-light.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-corenlp</artifactId>
5-
<version>4.2.0</version>
5+
<version>4.2.1</version>
66
<packaging>jar</packaging>
77
<name>Stanford CoreNLP</name>
88
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</url>
18-
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</connection>
17+
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.1.zip</url>
18+
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.1.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>
@@ -56,7 +56,7 @@
5656
<configuration>
5757
<artifacts>
5858
<artifact>
59-
<file>${project.basedir}/stanford-corenlp-4.2.0-models.jar</file>
59+
<file>${project.basedir}/stanford-corenlp-4.2.1-models.jar</file>
6060
<type>jar</type>
6161
<classifier>models</classifier>
6262
</artifact>

doc/lexparser/README.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Stanford Lexicalized Parser v4.2.0 - 2020-11-17
1+
Stanford Lexicalized Parser v4.2.1 - 2021-05-05
22
-----------------------------------------------
33

44
Copyright (c) 2002-2020 The Board of Trustees of The Leland Stanford Junior
@@ -311,6 +311,8 @@ LICENSE
311311
CHANGES
312312
---------------------------------
313313

314+
2021-05-05 4.2.1 Reduce size of srparser models
315+
314316
2020-11-17 4.2.0 Retrain English models with treebank fixes
315317

316318
2020-05-22 4.0.0 Model tokenization updated to UDv2.0

doc/lexparser/pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>edu.stanford.nlp</groupId>
44
<artifactId>stanford-parser</artifactId>
5-
<version>4.2.0</version>
5+
<version>4.2.1</version>
66
<packaging>jar</packaging>
77
<name>Stanford Parser</name>
88
<description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
@@ -14,8 +14,8 @@
1414
</license>
1515
</licenses>
1616
<scm>
17-
<url>http://nlp.stanford.edu/software/stanford-parser-4.2.0.zip</url>
18-
<connection>http://nlp.stanford.edu/software/stanford-parser-4.2.0.zip</connection>
17+
<url>http://nlp.stanford.edu/software/stanford-parser-4.2.1.zip</url>
18+
<connection>http://nlp.stanford.edu/software/stanford-parser-4.2.1.zip</connection>
1919
</scm>
2020
<developers>
2121
<developer>
@@ -81,7 +81,7 @@
8181
<configuration>
8282
<artifacts>
8383
<artifact>
84-
<file>${project.basedir}/stanford-parser-4.2.0-models.jar</file>
84+
<file>${project.basedir}/stanford-parser-4.2.1-models.jar</file>
8585
<type>jar</type>
8686
<classifier>models</classifier>
8787
</artifact>

itest/src/edu/stanford/nlp/ie/crf/TrainCRFClassifierSlowITest.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package edu.stanford.nlp.ie.crf;
22

3+
import java.nio.file.Files;
4+
import java.nio.file.Path;
35
import java.io.File;
46
import java.util.List;
57
import java.util.Scanner;
@@ -9,38 +11,41 @@
911

1012
import edu.stanford.nlp.io.IOUtils;
1113
import edu.stanford.nlp.util.PropertiesUtils;
14+
import edu.stanford.nlp.util.logging.Redwood;
1215
import edu.stanford.nlp.util.logging.StanfordRedwoodConfiguration;
1316

1417

1518
public class TrainCRFClassifierSlowITest {
1619

17-
private static final String crfTrainingWorkingDir = "/u/scr/nlp/data/stanford-corenlp-testing/crf-classifier-training";
20+
private static final Redwood.RedwoodChannels log = Redwood.channels(TrainCRFClassifierSlowITest.class);
1821

1922

20-
@SuppressWarnings("ResultOfMethodCallIgnored")
2123
@Test
2224
public void testGermanCRFClassifierTraining() throws Exception {
25+
Path tempdir = Files.createTempDirectory("ner");
26+
tempdir.toFile().deleteOnExit();
27+
log.info("Temp directory: " + tempdir);
28+
2329
StanfordRedwoodConfiguration.apply(PropertiesUtils.asProperties(
24-
"log.file", crfTrainingWorkingDir + "/german-crf.results"));
25-
// delete the model if present
26-
File originalModelFile = new File(crfTrainingWorkingDir, "german.distsim.crf.ser.gz");
27-
originalModelFile.delete();
30+
"log.file", tempdir + "/german-crf.results"));
31+
2832
// train the new model
33+
// requires the german ner model in the classpath
2934
CRFClassifier.main(new String[] {
30-
"-props", "edu/stanford/nlp/models/ner/german.distsim.prop",
31-
"-serializeTo", crfTrainingWorkingDir+"/german.distsim.crf.ser.gz"
35+
"-props", "edu/stanford/nlp/models/ner/german.distsim.prop",
36+
"-serializeTo", tempdir + "/german.distsim.crf.ser.gz"
3237
});
33-
List<String> germanTrainingResults = IOUtils.linesFromFile(crfTrainingWorkingDir + "/german-crf.results");
38+
List<String> germanTrainingResults = IOUtils.linesFromFile(tempdir + "/german-crf.results");
3439
String lastLineOfResults = germanTrainingResults.get(germanTrainingResults.size() - 1);
3540
Scanner scanner = new Scanner(lastLineOfResults);
3641
// ignore word "Totals"
3742
scanner.next();
3843
double p = scanner.nextDouble();
39-
Assert.assertEquals("Precision outside target range", 0.8628, p, 0.001);
44+
Assert.assertEquals("Precision outside target range", 0.8628, p, 0.002);
4045
double r = scanner.nextDouble();
41-
Assert.assertEquals("Recall outside target range", 0.7406, r, 0.0025);
46+
Assert.assertEquals("Recall outside target range", 0.7406, r, 0.005);
4247
double f1 = scanner.nextDouble();
43-
Assert.assertEquals("F1 outside target range", 0.7969, f1, 0.001);
48+
Assert.assertEquals("F1 outside target range", 0.7969, f1, 0.002);
4449
}
4550

4651
// Previous results (Totals on CoNLL 2003 testa)

itest/src/edu/stanford/nlp/parser/server/LexicalizedParserServerITest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class LexicalizedParserServerITest {
3030

3131
static final String testString = "John Bauer works at Stanford.";
3232
static final String resultString = "(ROOT (S (NP (NNP John) (NNP Bauer)) (VP (VBZ works) (PP (IN at) (NP (NNP Stanford)))) (. .)))";
33-
static final String binarizedResultString = "(ROOT (S (NP (NNP John) (NNP Bauer)) (@S (VP (VBZ works) (PP (IN at) (NP (NNP Stanford)))) (. .))))";
33+
static final String binarizedResultString = "(ROOT (S (@S (NP (NNP John) (NNP Bauer)) (VP (VBZ works) (PP (IN at) (NP (NNP Stanford))))) (. .)))";
3434
static final String collapsedTreeStanfordDependenciesString =
3535
("nn(Bauer-2, John-1)\n" +
3636
"nsubj(works-3, Bauer-2)\n" +

itest/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParserITest.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,25 @@ public void testBasicConstraint() {
6363
result = pq.getBestParse();
6464
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina) (NNPS Reapers))) (. .)))", result.toString());
6565

66+
// Note that since the constraints are introducing brackets which
67+
// don't exist, we may get some weird parse results as models
68+
// change in the future. The important thing is that the ADJP
69+
// bracket appears for this test and the VP bracket appears for
70+
// the next test
6671
constraint = new ParserConstraint(2, 4, "ADJP");
6772
constraints = Collections.singletonList(constraint);
6873
pq = englishParser.parserQuery();
6974
pq.setConstraints(constraints);
7075
assertTrue(pq.parse(sentence));
7176
result = pq.getBestParse();
72-
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (ADJP (NML (NP (NNP Carolina) (NNPS Reapers))))) (. .)))", result.toString());
77+
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (ADJP (NP (NNP Carolina) (NNPS Reapers)))) (. .)))", result.toString());
7378

7479
constraint = new ParserConstraint(1, 3, "VP");
7580
constraints = Collections.singletonList(constraint);
7681
pq = englishParser.parserQuery();
7782
pq.setConstraints(constraints);
7883
assertTrue(pq.parse(sentence));
7984
result = pq.getBestParse();
80-
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (NP (NNP Carolina))) (NP (NNPS Reapers)) (. .)))", result.toString());
85+
assertEquals("(ROOT (S (NP (PRP It)) (VP (VBD was) (ADJP (NNP Carolina))) (NP (NNPS Reapers)) (. .)))", result.toString());
8186
}
8287
}

0 commit comments

Comments
 (0)