Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit c20bca3

Browse files
CipherCipher
authored andcommitted
Added stemming function
1 parent 6dee9b2 commit c20bca3

File tree

3 files changed

+63
-13
lines changed

3 files changed

+63
-13
lines changed

Janex.py

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,23 @@ def patterncompare(input_string, intents_file_path):
3535
Similarity = 0
3636
pattern = pattern.lower()
3737
WordList = Tokenize(pattern)
38-
Similarity = len(set(BagOfWords) & set(WordList)) / len(set(BagOfWords + WordList))
38+
NewList = []
39+
NewBag = []
40+
41+
for word in WordList:
42+
word = stem(word)
43+
NewList.append(word)
44+
45+
for word in BagOfWords:
46+
word = stem(word)
47+
NewBag.append(word)
48+
49+
WordList = NewList
50+
BagOfWords = NewBag
51+
52+
for word in BagOfWords:
53+
if word in WordList:
54+
Similarity = (Similarity+1/len(WordList + BagOfWords))
3955

4056
if Similarity > MaxSimilarity:
4157
SimilarityPercentage = Similarity * 100
@@ -69,30 +85,61 @@ def responsecompare(input_string, intents_file_path, intent_class):
6985
raise NoMatchingIntentError("No matching intent class found.")
7086

7187
for response in responses:
88+
7289
Similarity = 0
73-
response = response.lower()
90+
pattern = response.lower()
7491
WordList = Tokenize(response)
92+
NewList = []
93+
NewBag = []
7594

76-
for InputWord in BagOfWords:
77-
for OutputWord in WordList:
78-
if InputWord == OutputWord:
79-
Similarity += 1
80-
# print("Match found!")
95+
for word in WordList:
96+
word = stem(word)
97+
NewList.append(word)
8198

82-
OutofHundred = len(BagOfWords) # Total number of words in the input
83-
Hundred = len(BagOfWords + WordList) # Total number of words in both input and pattern
99+
for word in BagOfWords:
100+
word = stem(word)
101+
NewBag.append(word)
102+
103+
WordList = NewList
104+
BagOfWords = NewBag
105+
106+
for word in BagOfWords:
107+
if word in WordList:
108+
Similarity = (Similarity+1/len(WordList + BagOfWords))
84109

85110
if Similarity > MaxSimilarity:
86-
SimilarityPercentage = (Similarity / Hundred) * 100
111+
SimilarityPercentage = Similarity * 100
87112
MaxSimilarity = Similarity
88113
MostSimilarResponse = response
89114

90115
print(f"Similarity: {SimilarityPercentage:.2f}%")
91116

92117
# Convert MSR back into original string
93118
for response in responses:
119+
lowresponselist = []
94120
lowresponse = response.lower()
95-
if lowresponse == MostSimilarResponse:
96-
MostSimilarResponse = response
121+
lowresponselist = stem_sentence(lowresponse)
122+
123+
for lowresponse in lowresponselist:
124+
if lowresponse == MostSimilarResponse:
125+
MostSImilarResponse = response
97126

98127
return MostSimilarResponse
128+
129+
def stem(input_word):
130+
suffixes = ['ing', 'ly', 'ed', 'es', 's', 'er', 'est', 'y']
131+
for suffix in suffixes:
132+
if input_word.endswith(suffix):
133+
input_word = input_word[:-len(suffix)]
134+
break
135+
return input_word
136+
137+
def stem_sentence(input_string):
138+
wordlist = []
139+
stemmedwords = []
140+
wordlist = input_string.split()
141+
for input_word in wordlist:
142+
word = stem(input_word)
143+
stemmedwords.append(word)
144+
145+
return stemmedwords

__pycache__/Janex.cpython-311.pyc

1.31 KB
Binary file not shown.

chat.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,7 @@
1010

1111
BestResponse = responsecompare(input_string, intents_file_path, intent_class)
1212

13-
print(BestResponse)
13+
print(BestResponse)
14+
15+
stemmed_words = stem_sentence(input_string)
16+
# print(stemmed_words)

0 commit comments

Comments
 (0)