Skip to content

Commit 9e1cead

Browse files
authored
Merge pull request #1 from manzurola/0.1.0
0.1.0
2 parents 2a88890 + b121898 commit 9e1cead

22 files changed

+359
-289
lines changed

.github/workflows/maven.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# This workflow will build a Java project with Maven
2+
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
3+
4+
name: build
5+
on:
6+
push:
7+
branches: [ main ]
8+
pull_request:
9+
branches: [ main ]
10+
11+
jobs:
12+
build:
13+
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- uses: actions/checkout@v2
18+
- name: Set up JDK 11
19+
uses: actions/setup-java@v2
20+
with:
21+
java-version: '11'
22+
distribution: 'adopt'
23+
- name: Build with Maven
24+
run: mvn -s $GITHUB_WORKSPACE/.github/workflows/settings.xml -B package --file pom.xml
25+
env:
26+
GH_USERNAME: ${{ secrets.GH_USERNAME }}
27+
GH_PAT: ${{ secrets.GH_PAT }}

.github/workflows/settings.xml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
2+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0
4+
http://maven.apache.org/xsd/settings-1.0.0.xsd">
5+
6+
<activeProfiles>
7+
<activeProfile>github</activeProfile>
8+
</activeProfiles>
9+
10+
<profiles>
11+
<profile>
12+
<id>github</id>
13+
<repositories>
14+
<repository>
15+
<id>central</id>
16+
<url>https://repo1.maven.org/maven2</url>
17+
</repository>
18+
<repository>
19+
<id>github</id>
20+
<url>https://maven.pkg.github.com/manzurola/*</url>
21+
<snapshots>
22+
<enabled>true</enabled>
23+
</snapshots>
24+
</repository>
25+
</repositories>
26+
</profile>
27+
</profiles>
28+
29+
<servers>
30+
<server>
31+
<id>github</id>
32+
<username>${env.GH_USERNAME}</username>
33+
<password>${env.GH_PAT}</password>
34+
</server>
35+
</servers>
36+
</settings>

README.md

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,84 @@
1-
# Errgent
1+
# Errgent - Grammatical Error Generation Toolkit 🤖
2+
3+
Errgent generates grammatically incorrect variances of a valid English sentence.
4+
5+
## How It Works
6+
7+
We first create variances of the document by inflecting each token in the document. For English, we use SimpleNLG to create inflections in a brute force manner.
8+
9+
We then process each inflected doc by Errant, which annotates each doc pair (inflected vs original) for grammatical errors.
10+
11+
Finally, we filter each pair to match those which contain the specified grammatical error.
12+
13+
## Prerequisits
14+
15+
Before you begin, ensure you have met the following requirements:
16+
17+
* You have Java 11 installed.
18+
* You have access to Github Packages Maven registry as described [here](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-apache-maven-registry#authenticating-to-github-packages).
19+
* You have the necessary prerequisites for [Errant4J](https://github.com/manzurola/errant4j#prerequisits).
20+
21+
## Installing Errgent
22+
23+
Add this to the dependencies section of your `pom.xml`:
24+
```xml
25+
<dependency>
26+
<groupId>com.github.manzurola</groupId>
27+
<artifactId>errgent</artifactId>
28+
<version>0.1.0</version>
29+
</dependency>
30+
```
31+
32+
## Using Errgent
33+
34+
To use Errgent in code, follow these steps:
35+
36+
```java
37+
// Get a spaCy instance
38+
SpaCy spacy = SpaCy.create(CoreNLPAdapter.create());
39+
40+
// Create an English error annotator
41+
Annotator annotator = Errant.newAnnotator("en", spacy);
42+
43+
// Create an English error generator
44+
Generator generator = Errgent.newGenerator("en", annotator);
45+
46+
// parse the doc (a utilty method)
47+
Doc target = generator.parse("My friends like to have fun.");
48+
49+
// Generate all documents that contain the specified error
50+
// (will contain "My friends like to has fun.")
51+
List<Doc> inflections = generator.generate(target, REPLACEMENT_SUBJECT_VERB_AGREEMENT);
52+
for (Doc inflection : inflections) {
53+
System.out.println(inflection.text());
54+
}
55+
```
56+
57+
Errgent is currently available only for English.
58+
59+
## Contributions
60+
61+
To contribute to Errgent, follow these steps:
62+
63+
1. Fork this repository.
64+
2. Create a branch: `git checkout -b <branch_name>`.
65+
3. Make your changes and commit them: `git commit -m '<commit_message>'`
66+
4. Push to the original branch: `git push origin <project_name>/<location>`
67+
5. Create the pull request.
68+
69+
Alternatively see the GitHub documentation on [creating a pull request](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
70+
71+
72+
## Contributors
73+
74+
Thanks to the following people who have contributed to this project:
75+
76+
* [@manzurola](https://github.com/manzurola) 🐈
77+
78+
## Contact
79+
80+
If you want to contact me you can reach me at [guy.manzurola@gmail.com](guy.manzurola@gmail.com).
81+
82+
## License
83+
84+
This project uses the following license: [MIT](https://github.com/manzurola/errgent/blob/main/LICENSE).

pom.xml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,25 @@
1515
<junit-jupiter.version>5.5.2</junit-jupiter.version>
1616
<logback.version>1.1.7</logback.version>
1717
<errant4j.version>0.3.0</errant4j.version>
18-
<spacy4j.version>0.1.0</spacy4j.version>
18+
<spacy4j.version>0.2.0</spacy4j.version>
1919
<simplenlg.version>4.5.0</simplenlg.version>
2020
</properties>
2121

2222
<dependencies>
2323
<dependency>
24-
<groupId>io.languagetoys</groupId>
24+
<groupId>com.github.manzurola</groupId>
25+
<artifactId>spacy4j-adapters-corenlp</artifactId>
26+
<version>${spacy4j.version}</version>
27+
<scope>test</scope>
28+
</dependency>
29+
<dependency>
30+
<groupId>com.github.manzurola</groupId>
31+
<artifactId>spacy4j-adapters-spacy-server</artifactId>
32+
<version>${spacy4j.version}</version>
33+
<scope>test</scope>
34+
</dependency>
35+
<dependency>
36+
<groupId>com.github.manzurola</groupId>
2537
<artifactId>errant4j</artifactId>
2638
<version>${errant4j.version}</version>
2739
</dependency>
@@ -35,18 +47,6 @@
3547
<artifactId>SimpleNLG</artifactId>
3648
<version>${simplenlg.version}</version>
3749
</dependency>
38-
<dependency>
39-
<groupId>io.languagetoys</groupId>
40-
<artifactId>spacy4j-adapters-corenlp</artifactId>
41-
<version>${spacy4j.version}</version>
42-
<scope>test</scope>
43-
</dependency>
44-
<dependency>
45-
<groupId>io.languagetoys</groupId>
46-
<artifactId>spacy4j-adapters-spacy-server</artifactId>
47-
<version>${spacy4j.version}</version>
48-
<scope>test</scope>
49-
</dependency>
5050
<dependency>
5151
<groupId>org.junit.jupiter</groupId>
5252
<artifactId>junit-jupiter</artifactId>

src/main/java/com/github/manzurola/errgent/core/Errgent.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
package com.github.manzurola.errgent.core;
22

3-
import com.github.manzurola.errgent.core.inflect.Inflector;
3+
import com.github.manzurola.errant4j.core.Annotator;
44
import com.github.manzurola.errgent.lang.en.inflector.EnInflector;
5-
import io.languagetoys.errant4j.core.Annotator;
65

76
import java.util.Map;
87
import java.util.function.Function;
98

10-
public class Errgent {
9+
public final class Errgent {
1110

1211
private static final Map<String, Function<Annotator, Generator>> generators;
1312

@@ -20,14 +19,6 @@ public class Errgent {
2019
private Errgent() {
2120
}
2221

23-
public static Generator enGenerator(Annotator annotator) {
24-
return newGenerator(annotator, new EnInflector());
25-
}
26-
27-
public static Generator newGenerator(Annotator annotator, Inflector inflector) {
28-
return new GeneratorImpl(annotator, inflector);
29-
}
30-
3122
public static Generator newGenerator(String language, Annotator annotator) {
3223
if (generators.containsKey(language)) {
3324
return generators.get(language).apply(annotator);
Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,30 @@
11
package com.github.manzurola.errgent.core;
22

3-
import com.github.manzurola.errgent.core.filter.InflectionFilter;
4-
import io.languagetoys.spacy4j.api.containers.Doc;
5-
import io.languagetoys.spacy4j.api.containers.Token;
3+
import com.github.manzurola.errant4j.core.GrammaticalError;
4+
import com.github.manzurola.spacy4j.api.containers.Doc;
65

76
import java.util.List;
87

8+
/**
9+
* Base interface for grammatical error generators.
10+
*/
911
public interface Generator {
1012

13+
/**
14+
* Utility method to apply NLP to a text using the underlying spacy instance.
15+
*
16+
* @param text the text to parse
17+
* @return a parsed Doc object
18+
*/
1119
Doc parse(String text);
1220

13-
List<Inflection> generate(List<Token> target, InflectionFilter filter);
21+
/**
22+
* Generate inflected docs with the specified grammatical error.
23+
*
24+
* @param target the target from which grammatically incorrect variances will be produced.
25+
* @return a list of {@link Doc} objects containing the specified grammatical error. Returns an empty list if no
26+
* matching errors could be produced.
27+
*/
28+
List<Doc> generate(Doc target, GrammaticalError error);
1429

1530
}
Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
package com.github.manzurola.errgent.core;
22

3-
import com.github.manzurola.errgent.core.filter.InflectionFilter;
4-
import com.github.manzurola.errgent.core.inflect.DocFactory;
3+
import com.github.manzurola.errant4j.core.Annotator;
4+
import com.github.manzurola.errant4j.core.GrammaticalError;
5+
import com.github.manzurola.errgent.core.inflect.Inflection;
6+
import com.github.manzurola.errgent.core.inflect.InflectionFactory;
57
import com.github.manzurola.errgent.core.inflect.Inflector;
6-
import io.languagetoys.errant4j.core.Annotation;
7-
import io.languagetoys.errant4j.core.Annotator;
8-
import io.languagetoys.spacy4j.api.containers.Doc;
9-
import io.languagetoys.spacy4j.api.containers.Token;
8+
import com.github.manzurola.spacy4j.api.containers.Doc;
109
import org.slf4j.Logger;
1110
import org.slf4j.LoggerFactory;
1211

1312
import java.util.List;
13+
import java.util.function.Predicate;
1414
import java.util.stream.Collectors;
1515

16-
public class GeneratorImpl implements Generator {
16+
public final class GeneratorImpl implements Generator {
1717
private final Logger logger = LoggerFactory.getLogger(this.getClass());
1818
private final Annotator annotator;
1919
private final Inflector inflector;
@@ -24,26 +24,26 @@ public GeneratorImpl(Annotator annotator, Inflector inflector) {
2424
}
2525

2626
@Override
27-
public Doc parse(String text) {
27+
public final Doc parse(String text) {
2828
return annotator.parse(text);
2929
}
3030

3131
@Override
32-
public List<Inflection> generate(List<Token> target, InflectionFilter filter) {
33-
DocFactory docFactory = new DocFactory(annotator);
32+
public List<Doc> generate(Doc target, GrammaticalError error) {
33+
InflectionFactory inflectionFactory = new InflectionFactory(annotator, target);
3434
return target
3535
.stream()
3636
.parallel()
37-
.flatMap(token -> inflector.inflect(token, docFactory))
38-
.map(inflectedDoc -> {
39-
List<Annotation> errors = annotator.annotate(inflectedDoc.tokens(), target)
40-
.stream()
41-
.filter(annotation -> !annotation.grammaticalError().isNone())
42-
.collect(Collectors.toList());
43-
return Inflection.of(inflectedDoc, errors);
44-
})
45-
.filter(filter::filter)
37+
.flatMap(token -> inflector.inflect(token, inflectionFactory))
38+
.filter(filter(List.of(error)))
39+
.map(Inflection::doc)
4640
.collect(Collectors.toList());
4741
}
4842

43+
private Predicate<Inflection> filter(List<GrammaticalError> errors) {
44+
return inflection -> inflection.errors()
45+
.stream()
46+
.anyMatch(annotation -> errors.contains(annotation.grammaticalError()));
47+
}
48+
4949
}

src/main/java/com/github/manzurola/errgent/core/filter/AllErrorsInflectionFilter.java

Lines changed: 0 additions & 23 deletions
This file was deleted.

src/main/java/com/github/manzurola/errgent/core/filter/AnyErrorInflectionFilter.java

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/main/java/com/github/manzurola/errgent/core/filter/InflectionFilter.java

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)