Skip to content

Commit 8159aeb

Browse files
committed
seqeval
1 parent 8ed7c49 commit 8159aeb

File tree

2 files changed

+139
-35
lines changed

2 files changed

+139
-35
lines changed

fnlp-core/src/main/java/org/fnlp/ml/eval/SeqEval.java

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@ public SeqEval() {
8383
STRLINE = "\\\\\\hline\n";
8484
sep = "&";
8585
}
86-
}
86+
}
87+
88+
89+
8790

8891

8992
/**
@@ -369,7 +372,9 @@ private String calcByOOVRate() {
369372

370373

371374
return toString("OOVRate",mpc, mp, mc, oov);
372-
}
375+
}
376+
377+
373378

374379
private String toString(String mark, Map mpc,
375380
Map mp, Map mc, Map oov) {
@@ -512,7 +517,65 @@ public String calcByType() {
512517

513518
return toString("Type",mpc, mp, mc,oov);
514519
}
515-
520+
/**
521+
* 计算Precision, Recall, F1值
522+
* @return
523+
* 下午5:24:53
524+
*/
525+
public double[] calcPRF() {
526+
527+
/**
528+
* 估计的中正确的,key是字符串长度,value是这种长度的个数
529+
*/
530+
Map<String,Double> mpc = new TreeMap<String,Double>();
531+
/**
532+
* 估计的,key是字符串长度,value是这种长度的个数
533+
*/
534+
Map<String,Double> mp = new TreeMap<String,Double>();
535+
/**
536+
* 正确的,key是字符串长度,value是这种长度的个数
537+
*/
538+
Map<String,Double> mc = new TreeMap<String,Double>();
539+
540+
/**
541+
* OOV
542+
*/
543+
Map<String,Double> oov = new TreeMap<String,Double>();
544+
545+
for(int i=0;i<entityCs.size();i++){
546+
LinkedList<Entity> cList = entityCs.get(i);
547+
LinkedList<Entity> pList = entityPs.get(i);
548+
LinkedList<Entity> cpList = entityCinPs.get(i);
549+
550+
for(Entity entity:cList){
551+
552+
adjust(mc, "", 1.0);
553+
if(dict!=null&&dict.size()>0){
554+
String s = entity.getEntityStr();
555+
if(!dict.contains(s)){
556+
adjust(oov, "", 1.0);
557+
}
558+
}
559+
}
560+
561+
for(Entity entity:pList){
562+
adjust(mp, "", 1.0);
563+
}
564+
565+
for(Entity entity:cpList){
566+
adjust(mpc, "", 1.0);
567+
}
568+
}
569+
570+
String key = "";
571+
double pre = (Double) mpc.get(key)/(Double) mp.get(key)*100;
572+
double recall = (Double)mpc.get(key)/(Double)mc.get(key)*100;
573+
double FB1 = (pre*recall*2)/(recall+pre)*100;
574+
575+
return new double[]{pre,recall,FB1};
576+
577+
}
578+
516579
public String calcByType2() {
517580

518581
/**
@@ -602,7 +665,8 @@ public void read(String filePath) throws IOException{
602665

603666
//从文件中提取实体并存入队列中
604667

605-
while ((line = reader.readLine()) != null) {
668+
while ((line = reader.readLine()) != null) {
669+
line = line.trim();
606670
if(line.equals("")){
607671

608672
newextract(words, markP, typeP, markC, typeC);
@@ -612,13 +676,16 @@ public void read(String filePath) throws IOException{
612676
//判断实体,实体开始的边界为B-***或者S-***,结束的边界为E-***或N(O)或空白字符或B-***
613677
//predict
614678
String[] toks = line.split("\\s+");
615-
679+
616680
int ci = 1;
617681
int cj=2;
618-
682+
619683
if(toks.length>3){//如果列数大于三,默认取最后两列
620684
ci=toks.length-2;
621685
cj=toks.length-1;
686+
}else if(toks.length<3){
687+
System.out.println(line);
688+
return;
622689
}
623690

624691
String[] marktype = getMarkType(toks[ci]);
@@ -691,7 +758,7 @@ private LinkedList<Entity> extract(ArrayList<String> words, ArrayList<String> ma
691758

692759
private boolean isStart(ArrayList<String> marks, ArrayList<String> types,
693760
int i) {
694-
761+
695762
if(NoSegLabel)
696763
return true;
697764

@@ -766,8 +833,8 @@ else if(!curType.equalsIgnoreCase(nextType))
766833

767834
return end;
768835
}
769-
770-
836+
837+
771838

772839
/**
773840
* 得到标记类型,BMES-后面的标记
@@ -776,13 +843,13 @@ else if(!curType.equalsIgnoreCase(nextType))
776843
*/
777844
private String[] getMarkType(String label) {
778845
String[] types = new String[2];
779-
846+
780847
if(NoSegLabel){
781848
types[0] = "";
782849
types[1] = label;
783850
return types;
784851
}
785-
852+
786853
int idx = label.indexOf('-');
787854
if(idx!=-1){
788855
types[0] = label.substring(0,idx);
@@ -881,29 +948,7 @@ public static void main(String[] args) throws IOException{
881948
ne1.getRightOOV("./paperdata/ctb6-seg/right-pattern.txt");
882949
ne1.NeEvl(null);
883950

884-
// ne1 = new NESatistic();
885-
// ne1.readOOV("./paperdata/exp-data/msr_training_words.utf8");
886-
// ne1.read("./paperdata/exp-data/msr_三列式结果_0.txt");
887-
// ne1.getWrongOOV("./paperdata/exp-data/msr_OOV-Wrong.txt");
888-
// ne1.NeEvl(null);
889-
//
890-
// ne1 = new NESatistic();
891-
// ne1.readOOV("./paperdata/exp-data/as_training_words.utf8");
892-
// ne1.read("./paperdata/exp-data/as_三列式结果_0.txt");
893-
// ne1.getWrongOOV("./paperdata/exp-data/as_OOV-Wrong.txt");
894-
// ne1.NeEvl(null);
895-
//
896-
// ne1 = new NESatistic();
897-
// ne1.readOOV("./paperdata/exp-data/pku_training_words.utf8");
898-
// ne1.read("./paperdata/exp-data/pku_三列式结果_0.txt");
899-
// ne1.getWrongOOV("./paperdata/exp-data/pku_OOV-Wrong.txt");
900-
// ne1.NeEvl(null);
901-
//
902-
// ne1 = new NESatistic();
903-
// ne1.readOOV("./paperdata/exp-data/cityu_training_words.utf8");
904-
// ne1.read("./paperdata/exp-data/cityu_三列式结果_0.txt");
905-
// ne1.getWrongOOV("./paperdata/exp-data/cityu_OOV-Wrong.txt");
906-
// ne1.NeEvl(null);
951+
907952

908953
}
909954

@@ -949,7 +994,7 @@ public void getWrongOOV(String string) {
949994
}
950995

951996

952-
private void getRightOOV(String string) {
997+
void getRightOOV(String string) {
953998

954999
if(dict==null||dict.size()==0)
9551000
return;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
*
3+
*/
4+
package org.fnlp.ml.eval;
5+
6+
import static org.junit.Assert.*;
7+
8+
import java.io.IOException;
9+
10+
import org.junit.AfterClass;
11+
import org.junit.BeforeClass;
12+
import org.junit.Test;
13+
14+
/**
15+
* @author Xipeng Qiu E-mail: xpqiu@fudan.edu.cn
16+
* @version 创建时间:2015年5月6日 下午4:54:05
17+
*/
18+
public class SeqEvalTest {
19+
20+
/**
21+
* @throws java.lang.Exception
22+
* 下午4:54:05
23+
*/
24+
@BeforeClass
25+
public static void setUpBeforeClass() throws Exception {
26+
}
27+
28+
/**
29+
* @throws java.lang.Exception
30+
* 下午4:54:05
31+
*/
32+
@AfterClass
33+
public static void tearDownAfterClass() throws Exception {
34+
}
35+
36+
@Test
37+
public void test() throws IOException {
38+
39+
40+
41+
String filePath = "./paperdata/ctb6-seg/work/ctb_三列式结果_0.txt";
42+
String dictpath = "D:\\项目\\9.评测\\NLPCC2015分词\\data21_No_0\\all.dict";
43+
// String filePath = "D:\\项目\\9.评测\\NLPCC2015分词\\data21_No_0\\testSeg.txt";
44+
// String dictpath = "D:\\项目\\9.评测\\NLPCC2015分词\\data21_No_0\\all.dict";
45+
46+
// filePath = "./example-data/sequence/seq.res";
47+
48+
//读取评测结果文件,并输出到outputPath
49+
SeqEval ne1;
50+
ne1 = new SeqEval();
51+
ne1.readOOV(dictpath);
52+
ne1.read(filePath);
53+
ne1.NeEvl(null);
54+
double[] res = ne1.calcPRF();
55+
System.out.print(res[0] +" " + res[1]+" " +res[2]);
56+
57+
}
58+
59+
}

0 commit comments

Comments
 (0)