Skip to content

Commit 95799a7

Browse files
chore: Update feature engineering and predictive modeling in app.py
1 parent 8544b9d commit 95799a7

File tree

1 file changed

+82
-11
lines changed

1 file changed

+82
-11
lines changed

app.py

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55
import matplotlib.pyplot as plt
66
import seaborn as sns
77
import plotly.express as px
8+
from sklearn.model_selection import train_test_split
9+
from sklearn.preprocessing import StandardScaler, LabelEncoder
10+
from sklearn.linear_model import LinearRegression
11+
from sklearn.tree import DecisionTreeRegressor
12+
from sklearn.ensemble import RandomForestRegressor
13+
from sklearn.metrics import mean_squared_error, r2_score
14+
import plotly.graph_objs as go
15+
816

917
# Define the tasks for each level
1018
tasks = {
@@ -223,33 +231,33 @@ def main():
223231

224232
if selected_task == 'Task 3' and selected_level == 'Level 2':
225233
st.markdown("### Task 3: Feature Engineering")
226-
df = pd.read_csv("./data/data.csv")
234+
df2 = pd.read_csv("./data/data.csv")
227235
st.write('---')
228236

229237
st.markdown("- Extract additional features from the existing columns, such as the length of the restaurant name or address.")
230-
df['Restaurant Name Length'] = df['Restaurant Name'].apply(len)
231-
df['Address Length'] = df['Address'].apply(len)
232-
st.write('Extracted Features:\n', df[['Restaurant Name Length', 'Address Length']])
238+
df['Restaurant Name Length'] = df2['Restaurant Name'].apply(len)
239+
df['Address Length'] = df2['Address'].apply(len)
240+
st.write('Extracted Features:\n', df2[['Restaurant Name Length', 'Address Length']])
233241

234242
st.write('---')
235243

236244
st.markdown("- Create new features like `Has Table Booking` or `Has Online Delivery` by encoding categorical variables.")
237-
df['Has Table Booking'] = df['Has Table booking'].apply(lambda x: 1 if x == 'Yes' else 0)
238-
df['Has Online Delivery'] = df['Has Online delivery'].apply(lambda x: 1 if x == 'Yes' else 0)
245+
df2['Has Table Booking'] = df2['Has Table booking'].apply(lambda x: 1 if x == 'Yes' else 0)
246+
df2['Has Online Delivery'] = df2['Has Online delivery'].apply(lambda x: 1 if x == 'Yes' else 0)
239247

240-
st.write('New Features:\n', df.head())
248+
st.write('New Features:\n', df2.head())
241249

242250
st.markdown("- Analyse the distribution of the newly created features.")
243251
plt.figure(figsize=(10, 6))
244-
sns.histplot(df['Restaurant Name Length'], bins=20, kde=True)
252+
sns.histplot(df2['Restaurant Name Length'], bins=20, kde=True)
245253
plt.title('Distribution of Restaurant Name Length')
246254
plt.xlabel('Restaurant Name Length')
247255
plt.ylabel('Frequency')
248256
st.pyplot(plt)
249257
st.write('---')
250258

251259
plt.figure(figsize=(10, 6))
252-
sns.histplot(df['Address Length'], bins=20, kde=True)
260+
sns.histplot(df2['Address Length'], bins=20, kde=True)
253261
plt.title('Distribution of Address Length')
254262
plt.xlabel('Address Length')
255263
plt.ylabel('Frequency')
@@ -258,7 +266,7 @@ def main():
258266

259267
st.markdown("- Analyse the count of Table Booking.")
260268
plt.figure(figsize=(10, 6))
261-
sns.countplot(x='Has Table Booking', data=df)
269+
sns.countplot(x='Has Table Booking', data=df2)
262270
plt.title('Count of Restaurants with/without Table Booking')
263271
plt.xlabel('Has Table Booking')
264272
plt.ylabel('Count')
@@ -268,13 +276,76 @@ def main():
268276

269277
st.markdown("- Analyse the count of Online Delivery.")
270278
plt.figure(figsize=(10, 6))
271-
sns.countplot(x='Has Online Delivery', data=df)
279+
sns.countplot(x='Has Online Delivery', data=df2)
272280
plt.title('Count of Restaurants with/without Online Delivery')
273281
plt.xlabel('Has Online Delivery')
274282
plt.ylabel('Count')
275283
st.pyplot(plt)
276284
st.write('---')
277285

286+
if selected_task == 'Task 1' and selected_level == 'Level 3':
287+
df = pd.read_csv("./data/data.csv")
288+
289+
st.markdown("### Task 1: Predictive Modelling")
290+
291+
st.markdown(" **Preprocess the data**: Handle categorical variables and normalize the data.")
292+
293+
le = LabelEncoder()
294+
df['Cuisines'] = le.fit_transform(df['Cuisines'])
295+
df['City'] = le.fit_transform(df['City'])
296+
df['Country Code'] = le.fit_transform(df['Country Code'])
297+
df['Rating color'] = le.fit_transform(df['Rating color'])
298+
df['Has Table booking'] = df['Has Table booking'].apply(lambda x: 1 if x == 'Yes' else 0)
299+
df['Has Online delivery'] = df['Has Online delivery'].apply(lambda x: 1 if x == 'Yes' else 0)
300+
st.write('Data after preprocessing:\n', df.head())
301+
302+
303+
# Select features and target variable
304+
features = ['Country Code', 'City', 'Cuisines', 'Price range', 'Has Table booking', 'Has Online delivery']
305+
X = df[features]
306+
y = df['Aggregate rating']
307+
308+
st.markdown("- **Split the data**: Split the dataset into training and testing sets.")
309+
310+
# Splitting the data into Train & Test datasets
311+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
312+
313+
st.write('---')
314+
315+
316+
# Standardize the features
317+
scaler = StandardScaler()
318+
X_train = scaler.fit_transform(X_train)
319+
X_test = scaler.transform(X_test)
320+
321+
st.markdown("- **Build and evaluate models**: Train and evaluate different regression models.")
322+
# Train and evaluate different regression models
323+
models = {
324+
'Linear Regression': LinearRegression(),
325+
'Decision Tree': DecisionTreeRegressor(random_state=42),
326+
'Random Forest': RandomForestRegressor(random_state=42)
327+
}
328+
st.write('---')
329+
# Displaying the models
330+
st.write("The models used are:-\n",models)
331+
332+
results = {}
333+
for name, model in models.items():
334+
model.fit(X_train, y_train)
335+
y_pred = model.predict(X_test)
336+
mse = mean_squared_error(y_test, y_pred)
337+
r2 = r2_score(y_test, y_pred)
338+
results[name] = {'MSE': mse, 'R2': r2}
339+
340+
st.write('---')
341+
# Display results
342+
results_df = pd.DataFrame(results).T
343+
st.write("After predication the scores are:-\n",results_df)
344+
st.write('---')
345+
346+
st.markdown("- **Split the data**: Split the dataset into training and testing sets.")
347+
st.write('---')
348+
278349

279350
if __name__ == '__main__':
280351
main()

0 commit comments

Comments
 (0)