1
+ #!/usr/bin/env python
1
2
2
3
from config import *
3
4
from Resources import *
@@ -19,7 +20,7 @@ def __init__(self):
19
20
Append tokens with similarity score(0.9)
20
21
'''
21
22
22
-
23
+
23
24
def load_paraphraseDatabase (self , FileName = 'Resources/ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs' ):
24
25
25
26
file = open (FileName ,'r' )
@@ -33,6 +34,7 @@ def load_paraphraseDatabase(self, FileName = 'Resources/ppdb-1.0-xxxl-lexical.ex
33
34
tokens [1 ] = tokens [1 ].strip ()
34
35
self .ppdbDict [(tokens [0 ], tokens [1 ])] = self .ppdbSim
35
36
count += 1
37
+ # print count
36
38
37
39
38
40
'''
@@ -43,12 +45,11 @@ def load_paraphraseDatabase(self, FileName = 'Resources/ppdb-1.0-xxxl-lexical.ex
43
45
44
46
def checkWordPresentInDataBase (self , word1 , word2 ):
45
47
46
- if (word1 .lower (), word2 .lower ()) in ppdbDict :
47
48
49
+ if (word1 .lower (), word2 .lower ()) in self .ppdbDict :
48
50
return True
49
51
50
- if (word1 .lower (), word2 .lower ()) in ppdbDict :
51
-
52
+ if (word2 .lower (), word1 .lower ()) in self .ppdbDict :
52
53
return True
53
54
54
55
@@ -64,6 +65,7 @@ def checkWordPresentInDataBase(self, word1, word2):
64
65
vii. If both the words are present in PPDB then return then PPDBSim(similarity score)(0.9)
65
66
66
67
Returns: similarity score between two words
68
+
67
69
'''
68
70
69
71
@@ -82,11 +84,13 @@ def computeWordSimilarityScore(self, word1, pos1, word2, pos2):
82
84
modifiedWord2 = word2 .replace (',' ,'' )
83
85
else :
84
86
modifiedWord2 = word2
85
-
87
+
86
88
if modifiedWord1 .lower () == modifiedWord2 .lower ():
89
+ # print "words exactly equal "
87
90
return 1
88
91
89
92
if self .stemmer .stem (word1 ).lower () == self .stemmer .stem (word2 ).lower ():
93
+ # print "stemma exactly equal "
90
94
return 1
91
95
92
96
if modifiedWord1 .isdigit () and modifiedWord2 .isdigit () and modifiedWord1 != modifiedWord2 :
@@ -108,7 +112,7 @@ def computeWordSimilarityScore(self, word1, pos1, word2, pos2):
108
112
#check words in database
109
113
110
114
if self .checkWordPresentInDataBase (word1 .lower (), word2 .lower ()):
111
- return ppdbSim
115
+ return self . ppdbSim
112
116
113
117
else :
114
118
return 0
0 commit comments