Skip to content

Commit 3ab5300

Browse files
Update app.py
1 parent dd75da1 commit 3ab5300

File tree

1 file changed

+22
-27
lines changed

1 file changed

+22
-27
lines changed

app.py

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,48 @@
11
import streamlit as st
22
import pickle
33
import string
4-
from nltk.corpus import stopwords
54
import nltk
5+
from nltk.corpus import stopwords
6+
from nltk.tokenize import word_tokenize
67
from nltk.stem.porter import PorterStemmer
78

8-
ps = PorterStemmer()
9+
# Ensure NLTK resources are downloaded
10+
nltk.download('punkt')
11+
nltk.download('stopwords')
912

13+
ps = PorterStemmer()
1014

1115
def transform_text(text):
1216
text = text.lower()
13-
text = nltk.word_tokenize(text)
17+
text = word_tokenize(text) # Tokenize text
1418

15-
y = []
16-
for i in text:
17-
if i.isalnum():
18-
y.append(i)
19+
# Remove non-alphanumeric tokens and punctuation, then remove stopwords
20+
text = [i for i in text if i.isalnum()]
21+
text = [i for i in text if i not in stopwords.words('english')]
22+
text = [i for i in text if i not in string.punctuation]
1923

20-
text = y[:]
21-
y.clear()
24+
# Apply stemming
25+
text = [ps.stem(i) for i in text]
2226

23-
for i in text:
24-
if i not in stopwords.words('english') and i not in string.punctuation:
25-
y.append(i)
27+
return " ".join(text)
2628

27-
text = y[:]
28-
y.clear()
29-
30-
for i in text:
31-
y.append(ps.stem(i))
32-
33-
return " ".join(y)
34-
35-
tfidf = pickle.load(open('vectorizer.pkl','rb'))
36-
model = pickle.load(open('model.pkl','rb'))
29+
# Load pre-trained models
30+
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
31+
model = pickle.load(open('model.pkl', 'rb'))
3732

33+
# Streamlit app
3834
st.title("Email/SMS Spam Classifier")
3935

4036
input_sms = st.text_area("Enter the message")
4137

4238
if st.button('Predict'):
43-
44-
# 1. preprocess
39+
# 1. Preprocess the input
4540
transformed_sms = transform_text(input_sms)
46-
# 2. vectorize
41+
# 2. Vectorize the input
4742
vector_input = tfidf.transform([transformed_sms])
48-
# 3. predict
43+
# 3. Predict
4944
result = model.predict(vector_input)[0]
50-
# 4. Display
45+
# 4. Display the result
5146
if result == 1:
5247
st.header("Spam")
5348
else:

0 commit comments

Comments
 (0)