@@ -83,7 +83,10 @@ public SeqEval() {
83
83
STRLINE = "\\ \\ \\ hline\n " ;
84
84
sep = "&" ;
85
85
}
86
- }
86
+ }
87
+
88
+
89
+
87
90
88
91
89
92
/**
@@ -369,7 +372,9 @@ private String calcByOOVRate() {
369
372
370
373
371
374
return toString ("OOVRate" ,mpc , mp , mc , oov );
372
- }
375
+ }
376
+
377
+
373
378
374
379
private String toString (String mark , Map mpc ,
375
380
Map mp , Map mc , Map oov ) {
@@ -512,7 +517,65 @@ public String calcByType() {
512
517
513
518
return toString ("Type" ,mpc , mp , mc ,oov );
514
519
}
515
-
520
+ /**
521
+ * 计算Precision, Recall, F1值
522
+ * @return
523
+ * 下午5:24:53
524
+ */
525
+ public double [] calcPRF () {
526
+
527
+ /**
528
+ * 估计的中正确的,key是字符串长度,value是这种长度的个数
529
+ */
530
+ Map <String ,Double > mpc = new TreeMap <String ,Double >();
531
+ /**
532
+ * 估计的,key是字符串长度,value是这种长度的个数
533
+ */
534
+ Map <String ,Double > mp = new TreeMap <String ,Double >();
535
+ /**
536
+ * 正确的,key是字符串长度,value是这种长度的个数
537
+ */
538
+ Map <String ,Double > mc = new TreeMap <String ,Double >();
539
+
540
+ /**
541
+ * OOV
542
+ */
543
+ Map <String ,Double > oov = new TreeMap <String ,Double >();
544
+
545
+ for (int i =0 ;i <entityCs .size ();i ++){
546
+ LinkedList <Entity > cList = entityCs .get (i );
547
+ LinkedList <Entity > pList = entityPs .get (i );
548
+ LinkedList <Entity > cpList = entityCinPs .get (i );
549
+
550
+ for (Entity entity :cList ){
551
+
552
+ adjust (mc , "" , 1.0 );
553
+ if (dict !=null &&dict .size ()>0 ){
554
+ String s = entity .getEntityStr ();
555
+ if (!dict .contains (s )){
556
+ adjust (oov , "" , 1.0 );
557
+ }
558
+ }
559
+ }
560
+
561
+ for (Entity entity :pList ){
562
+ adjust (mp , "" , 1.0 );
563
+ }
564
+
565
+ for (Entity entity :cpList ){
566
+ adjust (mpc , "" , 1.0 );
567
+ }
568
+ }
569
+
570
+ String key = "" ;
571
+ double pre = (Double ) mpc .get (key )/(Double ) mp .get (key )*100 ;
572
+ double recall = (Double )mpc .get (key )/(Double )mc .get (key )*100 ;
573
+ double FB1 = (pre *recall *2 )/(recall +pre )*100 ;
574
+
575
+ return new double []{pre ,recall ,FB1 };
576
+
577
+ }
578
+
516
579
public String calcByType2 () {
517
580
518
581
/**
@@ -602,7 +665,8 @@ public void read(String filePath) throws IOException{
602
665
603
666
//从文件中提取实体并存入队列中
604
667
605
- while ((line = reader .readLine ()) != null ) {
668
+ while ((line = reader .readLine ()) != null ) {
669
+ line = line .trim ();
606
670
if (line .equals ("" )){
607
671
608
672
newextract (words , markP , typeP , markC , typeC );
@@ -612,13 +676,16 @@ public void read(String filePath) throws IOException{
612
676
//判断实体,实体开始的边界为B-***或者S-***,结束的边界为E-***或N(O)或空白字符或B-***
613
677
//predict
614
678
String [] toks = line .split ("\\ s+" );
615
-
679
+
616
680
int ci = 1 ;
617
681
int cj =2 ;
618
-
682
+
619
683
if (toks .length >3 ){//如果列数大于三,默认取最后两列
620
684
ci =toks .length -2 ;
621
685
cj =toks .length -1 ;
686
+ }else if (toks .length <3 ){
687
+ System .out .println (line );
688
+ return ;
622
689
}
623
690
624
691
String [] marktype = getMarkType (toks [ci ]);
@@ -691,7 +758,7 @@ private LinkedList<Entity> extract(ArrayList<String> words, ArrayList<String> ma
691
758
692
759
private boolean isStart (ArrayList <String > marks , ArrayList <String > types ,
693
760
int i ) {
694
-
761
+
695
762
if (NoSegLabel )
696
763
return true ;
697
764
@@ -766,8 +833,8 @@ else if(!curType.equalsIgnoreCase(nextType))
766
833
767
834
return end ;
768
835
}
769
-
770
-
836
+
837
+
771
838
772
839
/**
773
840
* 得到标记类型,BMES-后面的标记
@@ -776,13 +843,13 @@ else if(!curType.equalsIgnoreCase(nextType))
776
843
*/
777
844
private String [] getMarkType (String label ) {
778
845
String [] types = new String [2 ];
779
-
846
+
780
847
if (NoSegLabel ){
781
848
types [0 ] = "" ;
782
849
types [1 ] = label ;
783
850
return types ;
784
851
}
785
-
852
+
786
853
int idx = label .indexOf ('-' );
787
854
if (idx !=-1 ){
788
855
types [0 ] = label .substring (0 ,idx );
@@ -881,29 +948,7 @@ public static void main(String[] args) throws IOException{
881
948
ne1 .getRightOOV ("./paperdata/ctb6-seg/right-pattern.txt" );
882
949
ne1 .NeEvl (null );
883
950
884
- // ne1 = new NESatistic();
885
- // ne1.readOOV("./paperdata/exp-data/msr_training_words.utf8");
886
- // ne1.read("./paperdata/exp-data/msr_三列式结果_0.txt");
887
- // ne1.getWrongOOV("./paperdata/exp-data/msr_OOV-Wrong.txt");
888
- // ne1.NeEvl(null);
889
- //
890
- // ne1 = new NESatistic();
891
- // ne1.readOOV("./paperdata/exp-data/as_training_words.utf8");
892
- // ne1.read("./paperdata/exp-data/as_三列式结果_0.txt");
893
- // ne1.getWrongOOV("./paperdata/exp-data/as_OOV-Wrong.txt");
894
- // ne1.NeEvl(null);
895
- //
896
- // ne1 = new NESatistic();
897
- // ne1.readOOV("./paperdata/exp-data/pku_training_words.utf8");
898
- // ne1.read("./paperdata/exp-data/pku_三列式结果_0.txt");
899
- // ne1.getWrongOOV("./paperdata/exp-data/pku_OOV-Wrong.txt");
900
- // ne1.NeEvl(null);
901
- //
902
- // ne1 = new NESatistic();
903
- // ne1.readOOV("./paperdata/exp-data/cityu_training_words.utf8");
904
- // ne1.read("./paperdata/exp-data/cityu_三列式结果_0.txt");
905
- // ne1.getWrongOOV("./paperdata/exp-data/cityu_OOV-Wrong.txt");
906
- // ne1.NeEvl(null);
951
+
907
952
908
953
}
909
954
@@ -949,7 +994,7 @@ public void getWrongOOV(String string) {
949
994
}
950
995
951
996
952
- private void getRightOOV (String string ) {
997
+ void getRightOOV (String string ) {
953
998
954
999
if (dict ==null ||dict .size ()==0 )
955
1000
return ;
0 commit comments