Update app.py

Md-Emon-Hasan · web-flow · commit 3ab530083498 · 2024-08-08T13:45:33.000+06:00
diff --git a/app.py b/app.py
@@ -1,53 +1,48 @@
 import streamlit as st
 import pickle
 import string
-from nltk.corpus import stopwords
 import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
 from nltk.stem.porter import PorterStemmer
 
-ps = PorterStemmer()
+# Ensure NLTK resources are downloaded
+nltk.download('punkt')
+nltk.download('stopwords')
 
+ps = PorterStemmer()
 
 def transform_text(text):
     text = text.lower()
-    text = nltk.word_tokenize(text)
+    text = word_tokenize(text)  # Tokenize text
 
-    y = []
-    for i in text:
-        if i.isalnum():
-            y.append(i)
+    # Remove non-alphanumeric tokens and punctuation, then remove stopwords
+    text = [i for i in text if i.isalnum()]
+    text = [i for i in text if i not in stopwords.words('english')]
+    text = [i for i in text if i not in string.punctuation]
 
-    text = y[:]
-    y.clear()
+    # Apply stemming
+    text = [ps.stem(i) for i in text]
 
-    for i in text:
-        if i not in stopwords.words('english') and i not in string.punctuation:
-            y.append(i)
+    return " ".join(text)
 
-    text = y[:]
-    y.clear()
-
-    for i in text:
-        y.append(ps.stem(i))
-
-    return " ".join(y)
-
-tfidf = pickle.load(open('vectorizer.pkl','rb'))
-model = pickle.load(open('model.pkl','rb'))
+# Load pre-trained models
+tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
+model = pickle.load(open('model.pkl', 'rb'))
 
+# Streamlit app
 st.title("Email/SMS Spam Classifier")
 
 input_sms = st.text_area("Enter the message")
 
 if st.button('Predict'):
-
-    # 1. preprocess
+    # 1. Preprocess the input
     transformed_sms = transform_text(input_sms)
-    # 2. vectorize
+    # 2. Vectorize the input
     vector_input = tfidf.transform([transformed_sms])
-    # 3. predict
+    # 3. Predict
     result = model.predict(vector_input)[0]
-    # 4. Display
+    # 4. Display the result
     if result == 1:
         st.header("Spam")
     else: