Skip to content
This repository was archived by the owner on Feb 8, 2024. It is now read-only.

Commit d9dd7e3

Browse files
author
Cipher
committed
e
1 parent 8d9ad88 commit d9dd7e3

20 files changed

+302
-302
lines changed

Janex.egg-info/PKG-INFO

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: Janex
3-
Version: 0.0.6a0
3+
Version: 0.0.7
44
Home-page: https://github.com/Cipher58
55
Download-URL: https://github.com/Cipher58/Janex/
66
Author: Cipher58
@@ -100,5 +100,5 @@ print(output)
100100
- Word tokenizer ✓
101101
- Intent classifier ✓
102102
- Word Stemmer ✓
103-
- Support for Darwin, Unix-like and Windows ✓
103+
- Support for Darwin, Linux (GNU) and Windows ✓
104104
- Simple text transformer ✓

Janex/__init__.py

Lines changed: 1 addition & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,150 +1 @@
1-
import json
2-
3-
class IntentMatcher:
4-
def __init__(self, intents_file_path):
5-
self.intents_file_path = intents_file_path
6-
self.intents = self.train()
7-
8-
def tokenize(self, input_string):
9-
processed_string = input_string.lower().strip().replace(r"[^\w\s]|_", "").replace(r"\s+", " ")
10-
words = processed_string.split(" ")
11-
12-
words = self.stem_list(words)
13-
14-
return words
15-
16-
def tokenize_list(self, input_list):
17-
token_words = []
18-
for word in input_list:
19-
token = self.tokenize(word)
20-
token_words.append(token)
21-
22-
return token_words
23-
24-
def train(self):
25-
with open(self.intents_file_path, "r") as file:
26-
intents = json.load(file)
27-
return intents
28-
29-
def pattern_compare(self, input_string):
30-
input_string_lower = input_string.lower()
31-
highest_similarity = 0
32-
most_similar_pattern = None
33-
similarity_percentage = 0
34-
35-
for intent_class in self.intents["intents"]:
36-
overall_word_list = []
37-
similarity = 0
38-
39-
for pattern in intent_class["patterns"]:
40-
word_list = []
41-
pattern_lower = pattern.lower()
42-
word_list = self.tokenize(pattern_lower)
43-
overall_word_list.append(word_list)
44-
new_list = []
45-
new_bag = []
46-
47-
for word in word_list:
48-
word = self.stem(word)
49-
new_list.append(word)
50-
51-
word_list_2 = self.tokenize(input_string_lower)
52-
for word in word_list_2:
53-
word = self.stem(word)
54-
new_bag.append(word)
55-
56-
word_list = new_list
57-
word_list_2 = new_bag
58-
59-
for word in word_list_2:
60-
if word in word_list:
61-
similarity += 1
62-
63-
if similarity > highest_similarity:
64-
similarity_percentage = similarity / (len(overall_word_list) + len(word_list_2))
65-
highest_similarity = similarity
66-
most_similar_pattern = intent_class
67-
68-
print(f"Similarity: {similarity_percentage:.2%}")
69-
70-
if most_similar_pattern:
71-
highest_similarity = highest_similarity / 100
72-
return most_similar_pattern, highest_similarity
73-
else:
74-
raise ValueError("No matching intent class found.")
75-
76-
def response_compare(self, input_string, intent_class):
77-
input_string_lower = input_string.lower()
78-
highest_similarity = 0
79-
similarity_percentage = 0
80-
most_similar_response = None
81-
82-
responses = intent_class["responses"] if intent_class else []
83-
84-
for response in responses:
85-
similarity = 0
86-
response_lower = response.lower()
87-
word_list = self.tokenize(response_lower)
88-
new_list = []
89-
new_bag = []
90-
91-
for word in word_list:
92-
word = self.stem(word)
93-
new_list.append(word)
94-
95-
word_list_2 = self.tokenize(input_string_lower)
96-
for word in word_list_2:
97-
word = self.stem(word)
98-
new_bag.append(word)
99-
100-
word_list = new_list
101-
word_list_2 = new_bag
102-
overall_word_list = word_list + word_list_2
103-
104-
for word in word_list_2:
105-
if word in word_list:
106-
similarity += 1
107-
108-
if similarity > highest_similarity:
109-
similarity_percentage = similarity / (len(overall_word_list) + len(word_list_2))
110-
highest_similarity = similarity
111-
most_similar_response = response
112-
113-
print(f"Similarity: {similarity_percentage:.2%}")
114-
115-
# Convert most_similar_response back into the original string
116-
for response in responses:
117-
low_response_list = []
118-
low_response = response.lower()
119-
low_response_list = self.stem_sentence(low_response)
120-
121-
for low_response_word in low_response_list:
122-
if low_response_word == most_similar_response:
123-
most_similar_response = response
124-
125-
return most_similar_response
126-
127-
def stem(self, input_word):
128-
suffixes = ["ing", "ly", "ed", "es", "'s", "er", "est", "y", "ily", "able", "ful", "ness", "less", "ment", "ive", "ize", "ous"]
129-
for suffix in suffixes:
130-
if input_word.endswith(suffix):
131-
input_word = input_word[:-len(suffix)]
132-
break
133-
return input_word
134-
135-
def stem_sentence(self, input_string):
136-
word_list = input_string.split(" ")
137-
stemmed_words = []
138-
for input_word in word_list:
139-
word = self.stem(input_word)
140-
stemmed_words.append(word)
141-
142-
return stemmed_words
143-
144-
def stem_list(self, input_list):
145-
stemmed_words = []
146-
for word in input_list:
147-
stemmed_word = self.stem(word)
148-
stemmed_words.append(stemmed_word)
149-
150-
return stemmed_words
1+
from .main import *

Janex/main.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,150 @@
1+
import json
12

3+
class IntentMatcher:
4+
def __init__(self, intents_file_path):
5+
self.intents_file_path = intents_file_path
6+
self.intents = self.train()
7+
8+
def tokenize(self, input_string):
9+
processed_string = input_string.lower().strip().replace(r"[^\w\s]|_", "").replace(r"\s+", " ")
10+
words = processed_string.split(" ")
11+
12+
words = self.stem_list(words)
13+
14+
return words
15+
16+
def tokenize_list(self, input_list):
17+
token_words = []
18+
for word in input_list:
19+
token = self.tokenize(word)
20+
token_words.append(token)
21+
22+
return token_words
23+
24+
def train(self):
25+
with open(self.intents_file_path, "r") as file:
26+
intents = json.load(file)
27+
return intents
28+
29+
def pattern_compare(self, input_string):
30+
input_string_lower = input_string.lower()
31+
highest_similarity = 0
32+
most_similar_pattern = None
33+
similarity_percentage = 0
34+
35+
for intent_class in self.intents["intents"]:
36+
overall_word_list = []
37+
similarity = 0
38+
39+
for pattern in intent_class["patterns"]:
40+
word_list = []
41+
pattern_lower = pattern.lower()
42+
word_list = self.tokenize(pattern_lower)
43+
overall_word_list.append(word_list)
44+
new_list = []
45+
new_bag = []
46+
47+
for word in word_list:
48+
word = self.stem(word)
49+
new_list.append(word)
50+
51+
word_list_2 = self.tokenize(input_string_lower)
52+
for word in word_list_2:
53+
word = self.stem(word)
54+
new_bag.append(word)
55+
56+
word_list = new_list
57+
word_list_2 = new_bag
58+
59+
for word in word_list_2:
60+
if word in word_list:
61+
similarity += 1
62+
63+
if similarity > highest_similarity:
64+
similarity_percentage = similarity / (len(overall_word_list) + len(word_list_2))
65+
highest_similarity = similarity
66+
most_similar_pattern = intent_class
67+
68+
print(f"Similarity: {similarity_percentage:.2%}")
69+
70+
if most_similar_pattern:
71+
highest_similarity = highest_similarity / 100
72+
return most_similar_pattern, highest_similarity
73+
else:
74+
raise ValueError("No matching intent class found.")
75+
76+
def response_compare(self, input_string, intent_class):
77+
input_string_lower = input_string.lower()
78+
highest_similarity = 0
79+
similarity_percentage = 0
80+
most_similar_response = None
81+
82+
responses = intent_class["responses"] if intent_class else []
83+
84+
for response in responses:
85+
similarity = 0
86+
response_lower = response.lower()
87+
word_list = self.tokenize(response_lower)
88+
new_list = []
89+
new_bag = []
90+
91+
for word in word_list:
92+
word = self.stem(word)
93+
new_list.append(word)
94+
95+
word_list_2 = self.tokenize(input_string_lower)
96+
for word in word_list_2:
97+
word = self.stem(word)
98+
new_bag.append(word)
99+
100+
word_list = new_list
101+
word_list_2 = new_bag
102+
overall_word_list = word_list + word_list_2
103+
104+
for word in word_list_2:
105+
if word in word_list:
106+
similarity += 1
107+
108+
if similarity > highest_similarity:
109+
similarity_percentage = similarity / (len(overall_word_list) + len(word_list_2))
110+
highest_similarity = similarity
111+
most_similar_response = response
112+
113+
print(f"Similarity: {similarity_percentage:.2%}")
114+
115+
# Convert most_similar_response back into the original string
116+
for response in responses:
117+
low_response_list = []
118+
low_response = response.lower()
119+
low_response_list = self.stem_sentence(low_response)
120+
121+
for low_response_word in low_response_list:
122+
if low_response_word == most_similar_response:
123+
most_similar_response = response
124+
125+
return most_similar_response
126+
127+
def stem(self, input_word):
128+
suffixes = ["ing", "ly", "ed", "es", "'s", "er", "est", "y", "ily", "able", "ful", "ness", "less", "ment", "ive", "ize", "ous"]
129+
for suffix in suffixes:
130+
if input_word.endswith(suffix):
131+
input_word = input_word[:-len(suffix)]
132+
break
133+
return input_word
134+
135+
def stem_sentence(self, input_string):
136+
word_list = input_string.split(" ")
137+
stemmed_words = []
138+
for input_word in word_list:
139+
word = self.stem(input_word)
140+
stemmed_words.append(word)
141+
142+
return stemmed_words
143+
144+
def stem_list(self, input_list):
145+
stemmed_words = []
146+
for word in input_list:
147+
stemmed_word = self.stem(word)
148+
stemmed_words.append(stemmed_word)
149+
150+
return stemmed_words

0 commit comments

Comments
 (0)