Added stemming function

Cipher · Cipher · commit c20bca31a796 · 2023-07-04T00:12:17.000+01:00
diff --git a/Janex.py b/Janex.py
@@ -35,7 +35,23 @@ def patterncompare(input_string, intents_file_path):
             Similarity = 0
             pattern = pattern.lower()
             WordList = Tokenize(pattern)
-            Similarity = len(set(BagOfWords) & set(WordList)) / len(set(BagOfWords + WordList))
+            NewList = []
+            NewBag = []
+
+            for word in WordList:
+                word = stem(word)
+                NewList.append(word)
+
+            for word in BagOfWords:
+                word = stem(word)
+                NewBag.append(word)
+
+            WordList = NewList
+            BagOfWords = NewBag
+
+            for word in BagOfWords:
+                if word in WordList:
+                    Similarity = (Similarity+1/len(WordList + BagOfWords))
 
             if Similarity > MaxSimilarity:
                 SimilarityPercentage = Similarity * 100
@@ -69,30 +85,61 @@ def responsecompare(input_string, intents_file_path, intent_class):
         raise NoMatchingIntentError("No matching intent class found.")
 
     for response in responses:
+
         Similarity = 0
-        response = response.lower()
+        pattern = response.lower()
         WordList = Tokenize(response)
+        NewList = []
+        NewBag = []
 
-        for InputWord in BagOfWords:
-            for OutputWord in WordList:
-                if InputWord == OutputWord:
-                    Similarity += 1
-#                    print("Match found!")
+        for word in WordList:
+            word = stem(word)
+            NewList.append(word)
 
-        OutofHundred = len(BagOfWords)  # Total number of words in the input
-        Hundred = len(BagOfWords + WordList)  # Total number of words in both input and pattern
+        for word in BagOfWords:
+            word = stem(word)
+            NewBag.append(word)
+
+        WordList = NewList
+        BagOfWords = NewBag
+
+        for word in BagOfWords:
+            if word in WordList:
+                Similarity = (Similarity+1/len(WordList + BagOfWords))
 
         if Similarity > MaxSimilarity:
-            SimilarityPercentage = (Similarity / Hundred) * 100
+            SimilarityPercentage = Similarity * 100
             MaxSimilarity = Similarity
             MostSimilarResponse = response
 
     print(f"Similarity: {SimilarityPercentage:.2f}%")
 
     # Convert MSR back into original string
     for response in responses:
+        lowresponselist = []
         lowresponse = response.lower()
-        if lowresponse == MostSimilarResponse:
-            MostSimilarResponse = response
+        lowresponselist = stem_sentence(lowresponse)
+
+        for lowresponse in lowresponselist:
+            if lowresponse == MostSimilarResponse:
+                MostSImilarResponse = response
 
     return MostSimilarResponse
+
+def stem(input_word):
+    suffixes = ['ing', 'ly', 'ed', 'es', 's', 'er', 'est', 'y']
+    for suffix in suffixes:
+        if input_word.endswith(suffix):
+            input_word = input_word[:-len(suffix)]
+            break
+    return input_word
+
+def stem_sentence(input_string):
+    wordlist = []
+    stemmedwords = []
+    wordlist = input_string.split()
+    for input_word in wordlist:
+        word = stem(input_word)
+        stemmedwords.append(word)
+
+    return stemmedwords
diff --git a/__pycache__/Janex.cpython-311.pyc b/__pycache__/Janex.cpython-311.pyc
diff --git a/chat.py b/chat.py
@@ -10,4 +10,7 @@
 
 BestResponse = responsecompare(input_string, intents_file_path, intent_class)
 
-print(BestResponse)
+print(BestResponse)
+
+stemmed_words = stem_sentence(input_string)
+# print(stemmed_words)