5
5
import matplotlib .pyplot as plt
6
6
import seaborn as sns
7
7
import plotly .express as px
8
+ from sklearn .model_selection import train_test_split
9
+ from sklearn .preprocessing import StandardScaler , LabelEncoder
10
+ from sklearn .linear_model import LinearRegression
11
+ from sklearn .tree import DecisionTreeRegressor
12
+ from sklearn .ensemble import RandomForestRegressor
13
+ from sklearn .metrics import mean_squared_error , r2_score
14
+ import plotly .graph_objs as go
15
+
8
16
9
17
# Define the tasks for each level
10
18
tasks = {
@@ -223,33 +231,33 @@ def main():
223
231
224
232
if selected_task == 'Task 3' and selected_level == 'Level 2' :
225
233
st .markdown ("### Task 3: Feature Engineering" )
226
- df = pd .read_csv ("./data/data.csv" )
234
+ df2 = pd .read_csv ("./data/data.csv" )
227
235
st .write ('---' )
228
236
229
237
st .markdown ("- Extract additional features from the existing columns, such as the length of the restaurant name or address." )
230
- df ['Restaurant Name Length' ] = df ['Restaurant Name' ].apply (len )
231
- df ['Address Length' ] = df ['Address' ].apply (len )
232
- st .write ('Extracted Features:\n ' , df [['Restaurant Name Length' , 'Address Length' ]])
238
+ df ['Restaurant Name Length' ] = df2 ['Restaurant Name' ].apply (len )
239
+ df ['Address Length' ] = df2 ['Address' ].apply (len )
240
+ st .write ('Extracted Features:\n ' , df2 [['Restaurant Name Length' , 'Address Length' ]])
233
241
234
242
st .write ('---' )
235
243
236
244
st .markdown ("- Create new features like `Has Table Booking` or `Has Online Delivery` by encoding categorical variables." )
237
- df ['Has Table Booking' ] = df ['Has Table booking' ].apply (lambda x : 1 if x == 'Yes' else 0 )
238
- df ['Has Online Delivery' ] = df ['Has Online delivery' ].apply (lambda x : 1 if x == 'Yes' else 0 )
245
+ df2 ['Has Table Booking' ] = df2 ['Has Table booking' ].apply (lambda x : 1 if x == 'Yes' else 0 )
246
+ df2 ['Has Online Delivery' ] = df2 ['Has Online delivery' ].apply (lambda x : 1 if x == 'Yes' else 0 )
239
247
240
- st .write ('New Features:\n ' , df .head ())
248
+ st .write ('New Features:\n ' , df2 .head ())
241
249
242
250
st .markdown ("- Analyse the distribution of the newly created features." )
243
251
plt .figure (figsize = (10 , 6 ))
244
- sns .histplot (df ['Restaurant Name Length' ], bins = 20 , kde = True )
252
+ sns .histplot (df2 ['Restaurant Name Length' ], bins = 20 , kde = True )
245
253
plt .title ('Distribution of Restaurant Name Length' )
246
254
plt .xlabel ('Restaurant Name Length' )
247
255
plt .ylabel ('Frequency' )
248
256
st .pyplot (plt )
249
257
st .write ('---' )
250
258
251
259
plt .figure (figsize = (10 , 6 ))
252
- sns .histplot (df ['Address Length' ], bins = 20 , kde = True )
260
+ sns .histplot (df2 ['Address Length' ], bins = 20 , kde = True )
253
261
plt .title ('Distribution of Address Length' )
254
262
plt .xlabel ('Address Length' )
255
263
plt .ylabel ('Frequency' )
@@ -258,7 +266,7 @@ def main():
258
266
259
267
st .markdown ("- Analyse the count of Table Booking." )
260
268
plt .figure (figsize = (10 , 6 ))
261
- sns .countplot (x = 'Has Table Booking' , data = df )
269
+ sns .countplot (x = 'Has Table Booking' , data = df2 )
262
270
plt .title ('Count of Restaurants with/without Table Booking' )
263
271
plt .xlabel ('Has Table Booking' )
264
272
plt .ylabel ('Count' )
@@ -268,13 +276,76 @@ def main():
268
276
269
277
st .markdown ("- Analyse the count of Online Delivery." )
270
278
plt .figure (figsize = (10 , 6 ))
271
- sns .countplot (x = 'Has Online Delivery' , data = df )
279
+ sns .countplot (x = 'Has Online Delivery' , data = df2 )
272
280
plt .title ('Count of Restaurants with/without Online Delivery' )
273
281
plt .xlabel ('Has Online Delivery' )
274
282
plt .ylabel ('Count' )
275
283
st .pyplot (plt )
276
284
st .write ('---' )
277
285
286
+ if selected_task == 'Task 1' and selected_level == 'Level 3' :
287
+ df = pd .read_csv ("./data/data.csv" )
288
+
289
+ st .markdown ("### Task 1: Predictive Modelling" )
290
+
291
+ st .markdown (" **Preprocess the data**: Handle categorical variables and normalize the data." )
292
+
293
+ le = LabelEncoder ()
294
+ df ['Cuisines' ] = le .fit_transform (df ['Cuisines' ])
295
+ df ['City' ] = le .fit_transform (df ['City' ])
296
+ df ['Country Code' ] = le .fit_transform (df ['Country Code' ])
297
+ df ['Rating color' ] = le .fit_transform (df ['Rating color' ])
298
+ df ['Has Table booking' ] = df ['Has Table booking' ].apply (lambda x : 1 if x == 'Yes' else 0 )
299
+ df ['Has Online delivery' ] = df ['Has Online delivery' ].apply (lambda x : 1 if x == 'Yes' else 0 )
300
+ st .write ('Data after preprocessing:\n ' , df .head ())
301
+
302
+
303
+ # Select features and target variable
304
+ features = ['Country Code' , 'City' , 'Cuisines' , 'Price range' , 'Has Table booking' , 'Has Online delivery' ]
305
+ X = df [features ]
306
+ y = df ['Aggregate rating' ]
307
+
308
+ st .markdown ("- **Split the data**: Split the dataset into training and testing sets." )
309
+
310
+ # Splitting the data into Train & Test datasets
311
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
312
+
313
+ st .write ('---' )
314
+
315
+
316
+ # Standardize the features
317
+ scaler = StandardScaler ()
318
+ X_train = scaler .fit_transform (X_train )
319
+ X_test = scaler .transform (X_test )
320
+
321
+ st .markdown ("- **Build and evaluate models**: Train and evaluate different regression models." )
322
+ # Train and evaluate different regression models
323
+ models = {
324
+ 'Linear Regression' : LinearRegression (),
325
+ 'Decision Tree' : DecisionTreeRegressor (random_state = 42 ),
326
+ 'Random Forest' : RandomForestRegressor (random_state = 42 )
327
+ }
328
+ st .write ('---' )
329
+ # Displaying the models
330
+ st .write ("The models used are:-\n " ,models )
331
+
332
+ results = {}
333
+ for name , model in models .items ():
334
+ model .fit (X_train , y_train )
335
+ y_pred = model .predict (X_test )
336
+ mse = mean_squared_error (y_test , y_pred )
337
+ r2 = r2_score (y_test , y_pred )
338
+ results [name ] = {'MSE' : mse , 'R2' : r2 }
339
+
340
+ st .write ('---' )
341
+ # Display results
342
+ results_df = pd .DataFrame (results ).T
343
+ st .write ("After predication the scores are:-\n " ,results_df )
344
+ st .write ('---' )
345
+
346
+ st .markdown ("- **Split the data**: Split the dataset into training and testing sets." )
347
+ st .write ('---' )
348
+
278
349
279
350
if __name__ == '__main__' :
280
351
main ()
0 commit comments