Skip to content

Commit 9279d26

Browse files
committed
更新2.1版本
1 parent f3c6c64 commit 9279d26

File tree

25 files changed

+157
-165
lines changed

25 files changed

+157
-165
lines changed

fnlp-app/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
<parent>
55
<groupId>org.fnlp</groupId>
66
<artifactId>fnlp-all</artifactId>
7-
<version>2.0-SNAPSHOT</version>
7+
<version>2.1-SNAPSHOT</version>
88
</parent>
99
<groupId>org.fnlp</groupId>
1010
<artifactId>fnlp-app</artifactId>
11-
<version>2.0-SNAPSHOT</version>
11+
<version>2.1-SNAPSHOT</version>
1212
<name>fnlp-app</name>
1313
<url>http://maven.apache.org</url>
1414
<properties>
@@ -24,7 +24,7 @@
2424
<dependency>
2525
<groupId>org.fnlp</groupId>
2626
<artifactId>fnlp-core</artifactId>
27-
<version>2.0-SNAPSHOT</version>
27+
<version>2.1-SNAPSHOT</version>
2828
</dependency>
2929
<dependency>
3030
<groupId>org.apache.lucene</groupId>

fnlp-core/.classpath

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
2121
<attributes>
2222
<attribute name="maven.pomderived" value="true"/>
23-
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
2423
</attributes>
2524
</classpathentry>
2625
<classpathentry kind="output" path="target/classes"/>

fnlp-core/.project

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@
55
<projects>
66
</projects>
77
<buildSpec>
8-
<buildCommand>
9-
<name>org.eclipse.wst.common.project.facet.core.builder</name>
10-
<arguments>
11-
</arguments>
12-
</buildCommand>
138
<buildCommand>
149
<name>org.eclipse.jdt.core.javabuilder</name>
1510
<arguments>
@@ -20,17 +15,9 @@
2015
<arguments>
2116
</arguments>
2217
</buildCommand>
23-
<buildCommand>
24-
<name>org.eclipse.wst.validation.validationbuilder</name>
25-
<arguments>
26-
</arguments>
27-
</buildCommand>
2818
</buildSpec>
2919
<natures>
30-
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
31-
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
3220
<nature>org.eclipse.m2e.core.maven2Nature</nature>
3321
<nature>org.eclipse.jdt.core.javanature</nature>
34-
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
3522
</natures>
3623
</projectDescription>

fnlp-core/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
<parent>
55
<groupId>org.fnlp</groupId>
66
<artifactId>fnlp-all</artifactId>
7-
<version>2.0-SNAPSHOT</version>
7+
<version>2.1-SNAPSHOT</version>
88
<relativePath>../pom.xml</relativePath>
99
</parent>
1010
<groupId>org.fnlp</groupId>
1111
<artifactId>fnlp-core</artifactId>
12-
<version>2.0-SNAPSHOT</version>
12+
<version>2.1-SNAPSHOT</version>
1313
<name>fnlp-core</name>
1414
<url>https://github.com/xpqiu/fnlp/</url>
1515
<description>FNLP is developed for Chinese natural language processing (NLP), which also includes some machine learning algorithms and data sets to achieve the NLP tasks. FNLP is distributed under LGPL3.0.</description>

fnlp-core/src/main/java/org/fnlp/ml/classifier/AbstractClassifier.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
public abstract class AbstractClassifier implements Serializable{
3737

3838
private static final long serialVersionUID = -175929257288466023L;
39+
3940
protected AlphabetFactory factory;
4041

4142

fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/Linear.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@ public class Linear extends AbstractClassifier implements Serializable {
5151

5252
private static final long serialVersionUID = -2626247109469506636L;
5353

54-
protected Inferencer inferencer;
55-
56-
protected AlphabetFactory factory;
54+
protected Inferencer inferencer;
5755

5856
protected Pipe pipe;
5957

@@ -145,8 +143,6 @@ public Pipe getPipe() {
145143
return pipe;
146144
}
147145

148-
public AlphabetFactory getAlphabetFactory() {
149-
return factory;
150-
}
146+
151147

152148
}

fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/OnlineTrainer.java

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
/**
4141
* 在线参数训练类,
4242
* 可能问题:收敛控制,参数c设置过小,可能会导致“假收敛”的情况 2012.8.6
43-
*
43+
* @author xpqiu
4444
*/
4545
public class OnlineTrainer extends AbstractTrainer {
4646

@@ -61,7 +61,6 @@ public class OnlineTrainer extends AbstractTrainer {
6161
public boolean interim = false;
6262

6363
public float c=0.1f;
64-
6564
public float threshold = 0.99f;
6665

6766
protected Linear classifier;
@@ -74,55 +73,57 @@ public class OnlineTrainer extends AbstractTrainer {
7473
protected float[] weights;
7574
AlphabetFactory af;
7675

77-
public OnlineTrainer(AlphabetFactory af, int iternum) {
78-
//默认特征生成器
79-
Generator gen = new SFGenerator();
80-
//默认推理器
81-
this.inferencer = new LinearMax(gen, af.getLabelSize());
82-
//默认损失函数
83-
this.loss = new ZeroOneLoss();
84-
//默认参数更新策略
85-
this.update = new LinearMaxPAUpdate(loss);
86-
this.iternum = iternum;
87-
this.c = 0.1f;
88-
this.af = af;
89-
weights = (float[]) inferencer.getWeights();
90-
if (weights == null) {
91-
weights = new float[af.getFeatureSize()];
92-
inferencer.setWeights(weights);
93-
}
94-
random = new Random(1l);
95-
}
76+
9677
/**
9778
* 构造函数
9879
* @param af 字典
9980
*/
10081
public OnlineTrainer(AlphabetFactory af) {
10182
this(af,50);
83+
}
84+
85+
public OnlineTrainer(AlphabetFactory af, int iternum) {
86+
//默认特征生成器
87+
Generator gen = new SFGenerator();
88+
//默认推理器
89+
this.inferencer = new LinearMax(gen, af.getLabelSize());
90+
//默认损失函数
91+
this.loss = new ZeroOneLoss();
92+
//默认参数更新策略
93+
this.update = new LinearMaxPAUpdate(loss);
94+
this.iternum = iternum;
95+
this.af = af;
96+
weights = (float[]) inferencer.getWeights();
97+
if (weights == null) {
98+
weights = new float[af.getFeatureSize()];
99+
inferencer.setWeights(weights);
100+
}
101+
random = new Random(1l);
102102
}
103103

104104
/**
105105
* 构造函数
106106
* @param inferencer 推理算法
107107
* @param update 参数更新方法
108108
* @param loss 损失计算方法
109-
* @param fsize 特征数量
109+
* @param af 特征标签词典
110110
* @param iternum 最大迭代次数
111111
* @param c 步长阈值
112112
*/
113113
public OnlineTrainer(Inferencer inferencer, Update update,
114-
Loss loss, int fsize, int iternum, float c) {
114+
Loss loss, AlphabetFactory af, int iternum, float c) {
115115
this.inferencer = inferencer;
116116
this.update = update;
117117
this.loss = loss;
118118
this.iternum = iternum;
119-
this.c = c;
119+
this.c = c;
120+
this.af =af;
120121
weights = (float[]) inferencer.getWeights();
121122
if (weights == null) {
122-
weights = new float[fsize];
123+
weights = new float[af.getFeatureSize()];
123124
inferencer.setWeights(weights);
124-
}else if(weights.length<fsize){
125-
weights = Arrays.copyOf(weights, fsize);
125+
}else if(weights.length<af.getFeatureSize()){
126+
weights = Arrays.copyOf(weights, af.getFeatureSize());
126127
inferencer.setWeights(weights);
127128
}
128129
random = new Random(1l);
@@ -133,12 +134,12 @@ public OnlineTrainer(Inferencer inferencer, Update update,
133134
* @param classifier 分类器
134135
* @param update 参数更新方法
135136
* @param loss 损失计算方法
136-
* @param fsize 特征数量
137+
* @param af 特征标签词典
137138
* @param iternum 最大迭代次数
138139
* @param c 步长阈值
139140
*/
140-
public OnlineTrainer(Linear classifier, Update update, Loss loss, int fsize, int iternum, float c) {
141-
this(classifier.getInferencer(), update, loss, fsize, iternum, c);
141+
public OnlineTrainer(Linear classifier, Update update, Loss loss, AlphabetFactory af, int iternum, float c) {
142+
this(classifier.getInferencer(), update, loss, af, iternum, c);
142143
}
143144

144145
/**
@@ -252,7 +253,7 @@ public Linear train(InstanceSet trainset, InstanceSet devset) {
252253
System.out.println();
253254

254255
if (interim) {
255-
Linear p = new Linear(inferencer, trainset.getAlphabetFactory());
256+
Linear p = new Linear(inferencer, af);
256257
try {
257258
p.saveTo("tmp.model");
258259
} catch (IOException e) {

fnlp-core/src/main/java/org/fnlp/nlp/cn/PartOfSpeech.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ public enum PartOfSpeech {
2828
专有名,
2929
人名,
3030
地名,
31-
机构名,
31+
机构名,
32+
实体名,
3233
型号名,
3334
事件名,
3435
网址,
@@ -41,7 +42,7 @@ public enum PartOfSpeech {
4142

4243
代词,
4344
人称代词,
44-
指示代词,
45+
指示词,
4546
疑问代词,
4647

4748

@@ -58,8 +59,8 @@ public enum PartOfSpeech {
5859

5960

6061
动词,
61-
能愿动词,
62-
趋向动词,
62+
情态词,
63+
趋向词,
6364
被动词,
6465
把动词,
6566

@@ -104,7 +105,7 @@ public static PartOfSpeech[] valueOf(String[] pos) {
104105
static{
105106
Pronoun.add(代词);
106107
Pronoun.add(人称代词);
107-
Pronoun.add(指示代词);
108+
Pronoun.add(指示词);
108109
Pronoun.add(疑问代词);
109110
}
110111
/**
@@ -129,7 +130,8 @@ public boolean isMark() {
129130
entities.add(人名);
130131
entities.add(地名);
131132
entities.add(机构名);
132-
entities.add(专有名);
133+
entities.add(专有名);
134+
entities.add(实体名);
133135
}
134136

135137
/**
@@ -141,13 +143,18 @@ public boolean isEntiry() {
141143
return entities.contains(this);
142144
}
143145
/**
144-
* 判断词性是否为一个实体,包括:人名|地名|机构名|专有名。
146+
* 判断词性是否为一个实体,包括:人名|地名|机构名|专有名|实体名
145147
* @param pos
146148
* @return
147149
*/
148150
public static boolean isEntiry(String pos) {
149-
PartOfSpeech p = valueOf(pos);
150-
151+
PartOfSpeech p;
152+
try {
153+
p = valueOf(pos);
154+
} catch (Exception e) {
155+
System.err.println(pos+"不存在");
156+
return false;
157+
}
151158
return p.isEntiry();
152159
}
153160

fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/EntitiesGetter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public class EntitiesGetter {
4949
NP.add(PartOfSpeech.限定词);
5050
NP.add(PartOfSpeech.名词);
5151
NP.add(PartOfSpeech.代词);
52-
NP.add(PartOfSpeech.指示代词);
52+
NP.add(PartOfSpeech.指示词);
5353
NP.add(PartOfSpeech.人称代词);
5454
NP.add(PartOfSpeech.疑问代词);
5555

@@ -218,7 +218,7 @@ else if(this.isMale(ey.getData())){
218218
NN.add(PartOfSpeech.机构名);
219219
NN.add(PartOfSpeech.代词);
220220
NN.add(PartOfSpeech.人称代词);
221-
NN.add(PartOfSpeech.指示代词);
221+
NN.add(PartOfSpeech.指示词);
222222
NN.add(PartOfSpeech.疑问代词);
223223
}
224224

fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/train/ARClassifier.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ public void train() throws Exception {
146146
ZeroOneLoss l = new ZeroOneLoss();
147147
Inferencer ms = new LinearMax(gen, factory.getLabelSize());
148148
Update update = new LinearMaxPAUpdate(l);
149-
OnlineTrainer trainer = new OnlineTrainer(ms, update,l, factory.getFeatureSize(), 50,0.005f);
149+
OnlineTrainer trainer = new OnlineTrainer(ms, update,l, factory, 50,0.005f);
150150
Linear pclassifier = trainer.train(instset,instset);
151151
pipe.removeTargetPipe();
152152
pclassifier.setPipe(pipe);

fnlp-core/src/main/java/org/fnlp/nlp/parser/dep/train/JointParerTrainer.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,8 @@ private InstanceSet buildInstanceList(String file) throws IOException {
195195
public void train(String dataFile, int maxite, float c) throws IOException {
196196

197197
InstanceSet instset = buildInstanceList(dataFile);
198-
IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
199198

200199
SFGenerator generator = new SFGenerator();
201-
int fsize = features.size();
202200

203201
LabelAlphabet la = factory.DefaultLabelAlphabet();
204202
int ysize = la.size();
@@ -207,7 +205,7 @@ public void train(String dataFile, int maxite, float c) throws IOException {
207205
ZeroOneLoss loss = new ZeroOneLoss();
208206
Update update = new LinearMaxPAUpdate(loss);
209207
OnlineTrainer trainer = new OnlineTrainer(solver, update, loss,
210-
fsize, maxite, c);
208+
factory, maxite, c);
211209
Linear models = trainer.train(instset, null);
212210
instset = null;
213211
solver = null;

fnlp-core/src/main/java/org/fnlp/nlp/parser/dep/train/ParserTrainer.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,9 @@ public void train(String dataFile, int maxite, float c) throws IOException {
191191

192192
LabelAlphabet postagAlphabet = factory.buildLabelAlphabet("postag");
193193

194-
IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
195194

196195
SFGenerator generator = new SFGenerator();
197196
Linear[] models = new Linear[postagAlphabet.size()];
198-
int fsize = features.size();
199197

200198
for (int i = 0; i < postagAlphabet.size(); i++) {
201199
String pos = postagAlphabet.lookupString(i);
@@ -208,7 +206,7 @@ public void train(String dataFile, int maxite, float c) throws IOException {
208206
ZeroOneLoss loss = new ZeroOneLoss();
209207
Update update = new LinearMaxPAUpdate(loss);
210208
OnlineTrainer trainer = new OnlineTrainer(solver, update, loss,
211-
fsize, maxite, c);
209+
factory, maxite, c);
212210
models[i] = trainer.train(instset, null);
213211
instset = null;
214212
solver = null;

fnlp-core/src/main/java/org/fnlp/nlp/tag/AbstractTagger.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ public void train() throws Exception {
9292
OnlineTrainer trainer;
9393

9494
if(cl!=null){
95-
trainer = new OnlineTrainer(cl, update, loss, features.size(),iterNum, c);
95+
trainer = new OnlineTrainer(cl, update, loss, factory,iterNum, c);
9696
}else{
9797
trainer = new OnlineTrainer(inference, update, loss,
98-
features.size(), iterNum, c);
98+
factory, iterNum, c);
9999
}
100100

101101
cl = trainer.train(trainSet, testSet);

0 commit comments

Comments
 (0)