Skip to content

Commit 48a0948

Browse files
committed
更新训练程序
2 parents 668183b + 8cf116d commit 48a0948

File tree

14 files changed

+89
-227
lines changed

14 files changed

+89
-227
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ You can also use the following site to check the partial functionality.
4343

4444
[FNLP入门教程](https://github.com/xpqiu/fnlp/wiki)
4545

46-
大的模型文件和代码分离。相应的模型文件可以从release页面下载。
46+
除了源码文件,还需要下载FNLP模型文件。由于模型文件较大,不便于存放在源码库之中,请至[Release](https://github.com/xpqiu/fnlp/releases)页面下载,并将模型文件放在“models”目录。
47+
48+
* seg.m 分词模型
49+
* pos.m 词性标注模型
50+
* dep.m 依存句法分析模型
4751

4852
欢迎大家提供非Java语言的接口。
4953

fnlp-core/src/main/java/org/fnlp/ml/classifier/bayes/BayesClassifier.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,20 @@
1111
import java.io.ObjectInputStream;
1212
import java.io.ObjectOutputStream;
1313
import java.io.Serializable;
14-
import java.util.ArrayList;
1514
import java.util.Arrays;
1615
import java.util.zip.GZIPInputStream;
1716
import java.util.zip.GZIPOutputStream;
1817

1918
import org.fnlp.ml.classifier.AbstractClassifier;
20-
import org.fnlp.ml.classifier.LabelParser.Type;
21-
import org.fnlp.ml.classifier.linear.Linear;
2219
import org.fnlp.ml.classifier.LabelParser;
20+
import org.fnlp.ml.classifier.LabelParser.Type;
2321
import org.fnlp.ml.classifier.Predict;
24-
import org.fnlp.ml.classifier.TPredict;
2522
import org.fnlp.ml.feature.FeatureSelect;
2623
import org.fnlp.ml.types.Instance;
2724
import org.fnlp.ml.types.alphabet.AlphabetFactory;
2825
import org.fnlp.ml.types.sv.HashSparseVector;
2926
import org.fnlp.nlp.pipe.Pipe;
3027
import org.fnlp.util.exception.LoadModelException;
31-
import org.junit.Ignore;
3228
/**
3329
* 朴素贝叶斯分类器
3430
* @author sywu

fnlp-core/src/main/java/org/fnlp/nlp/corpus/StopWords.java

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,35 @@ public List<String> phraseDel(String[] words){
135135
return list;
136136
}
137137

138-
Pattern noise = Pattern.compile(".*["+CharSets.allRegexPunc+"\\d]+.*");
139-
140-
public boolean isStopWord(String word) {
141-
if (word.length() == 1 || word.length()>4)
142-
return true;
143-
144-
if (noise.matcher(word).matches())
145-
return true;
146-
138+
Pattern noise = Pattern.compile(".*["+CharSets.allRegexPunc+"\\d]+.*");
139+
140+
/**
141+
* 判断是否为停用词
142+
* @param word
143+
* @param minLen 最小长度
144+
* @param maxLen 最大长度
145+
* @return
146+
*/
147+
public boolean isStopWord(String word,int minLen, int maxLen) {
148+
if (word.length() < minLen || word.length()>maxLen)
149+
return true;
150+
151+
if (noise.matcher(word).matches())
152+
return true;
153+
154+
if (sWord.contains(word))
155+
return true;
156+
157+
return false;
158+
}
159+
160+
/**
161+
* 判断是否为停用词
162+
* @param word
163+
* @return
164+
*/
165+
public boolean isStopWord(String word) {
166+
147167
if (sWord.contains(word))
148168
return true;
149169

fnlp-core/src/main/java/org/fnlp/nlp/similarity/train/WordCluster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ public Cluster startClustering() {
543543
return null;
544544
}
545545
try {
546-
saveTxt("res-"+round);
546+
saveTxt("../tmp/res-"+round);
547547
} catch (Exception e) {
548548
// TODO Auto-generated catch block
549549
e.printStackTrace();

fnlp-core/src/test/java/org/fnlp/core/AppTest.java

Lines changed: 0 additions & 57 deletions
This file was deleted.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package org.fnlp.nlp.corpus;
2+
3+
import static org.junit.Assert.*;
4+
5+
import org.junit.AfterClass;
6+
import org.junit.BeforeClass;
7+
import org.junit.Test;
8+
9+
public class StopWordsTest {
10+
11+
@BeforeClass
12+
public static void setUpBeforeClass() throws Exception {
13+
}
14+
15+
@AfterClass
16+
public static void tearDownAfterClass() throws Exception {
17+
}
18+
19+
@Test
20+
public void testIsStopWordStringIntInt() {
21+
StopWords sw = new StopWords();
22+
sw.read("../models/stopwords/StopWords.txt");
23+
assertTrue(!sw.isStopWord("现在我",2,4));
24+
assertTrue(sw.isStopWord("我0",2,4));
25+
assertTrue(sw.isStopWord("我#",2,4));
26+
assertTrue(sw.isStopWord(" ",2,4));
27+
}
28+
29+
@Test
30+
public void testIsStopWordString() {
31+
StopWords sw = new StopWords();
32+
sw.read("../models/stopwords/StopWords.txt");
33+
assertTrue(!sw.isStopWord("现在我"));
34+
}
35+
36+
}

fnlp-core/src/test/java/org/fnlp/nlp/resources/StopWordsTest.java

Lines changed: 0 additions & 66 deletions
This file was deleted.

fnlp-core/src/test/java/org/fnlp/util/MyFilesTest.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,5 @@ public static void setUpBeforeClass() throws Exception {
2020
public static void tearDownAfterClass() throws Exception {
2121
}
2222

23-
@Test
24-
public void testCombineStringStringArray() {
25-
fail("Not yet implemented");
26-
}
27-
28-
@Test
29-
public void testCombineStringFileArray() throws Exception {
30-
List<File> files = MyFiles.getAllFiles("../tmp/", ".cws");
31-
MyFiles.combine("../tmp/all.cws",files.toArray(new File[files.size()]));
32-
System.out.println(new Date().toString());
33-
System.out.println("Done!");
34-
}
3523

3624
}

fnlp-demo/src/main/java/org/fnlp/demo/nlp/tc/TextClassificationTest.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package org.fnlp.demo.nlp.tc;
22

3-
import org.fnlp.data.reader.DocumentReader;
4-
import org.fnlp.data.reader.FileReader;
53
import org.fnlp.data.reader.Reader;
6-
import org.fnlp.ml.classifier.Predict;
74
import org.fnlp.ml.classifier.LabelParser.Type;
5+
import org.fnlp.ml.classifier.Predict;
86
import org.fnlp.ml.classifier.bayes.BayesClassifier;
97
import org.fnlp.ml.classifier.bayes.BayesTrainer;
108
import org.fnlp.ml.classifier.knn.KNNClassifier;

fnlp-train/src/main/java/org/fnlp/nlp/cn/rl/RLSeg.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ public class RLSeg {
5757
BufferedWriter bwNew;
5858
BufferedWriter bwNo;
5959
float prop = 0.5f;
60-
private String newdictfile = "./tmp/dict-new.txt";
61-
private String nodictfile = "./tmp/dict-no.txt";
60+
private String newdictfile = "../tmp/dict-new.txt";
61+
private String nodictfile = "../tmp/dict-no.txt";
6262

6363
public RLSeg(CWSTagger tag, String string) throws IOException{
6464
this.tag = tag;
@@ -88,7 +88,7 @@ public static void main(String[] args) throws Exception {
8888
tag.setDictionary(tempdict);
8989

9090
System.out.println("\n处理文件:");
91-
String s4 = tag.tagFile("./example-data/data-tag.txt");
91+
String s4 = tag.tagFile("../example-data/data-tag.txt");
9292
System.out.println(s4);
9393
String[] toks = s4.split("\\s+");
9494
int newset = rlseg.update(toks);
@@ -97,7 +97,7 @@ public static void main(String[] args) throws Exception {
9797
tag.setDictionary(tempdict);
9898

9999
System.out.println("\n处理文件:");
100-
String s = tag.tagFile("./example-data/data-tag.txt");
100+
String s = tag.tagFile("../example-data/data-tag.txt");
101101
System.out.println(s);
102102

103103

0 commit comments

Comments
 (0)