@@ -235,8 +235,8 @@ def main():
235
235
st .write ('---' )
236
236
237
237
st .markdown ("- Extract additional features from the existing columns, such as the length of the restaurant name or address." )
238
- df ['Restaurant Name Length' ] = df2 ['Restaurant Name' ].apply (len )
239
- df ['Address Length' ] = df2 ['Address' ].apply (len )
238
+ df2 ['Restaurant Name Length' ] = df2 ['Restaurant Name' ].apply (len )
239
+ df2 ['Address Length' ] = df2 ['Address' ].apply (len )
240
240
st .write ('Extracted Features:\n ' , df2 [['Restaurant Name Length' , 'Address Length' ]])
241
241
242
242
st .write ('---' )
@@ -341,6 +341,66 @@ def main():
341
341
st .write ("After predication the scores are:-\n " ,results_df )
342
342
st .write ('---' )
343
343
344
+ st .markdown ("- Visualize the performance of different regression models using a bar chart." )
345
+ results_df .plot (kind = 'bar' , figsize = (10 , 6 ))
346
+ plt .title ('Model Performance Comparison' )
347
+ plt .xlabel ('Model' )
348
+ plt .ylabel ('Score' )
349
+ st .pyplot (plt )
350
+ st .write ('---' )
351
+
352
+ if selected_task == 'Task 2' and selected_level == 'Level 3' :
353
+ df = pd .read_csv ("./data/data.csv" )
354
+
355
+ le = LabelEncoder ()
356
+ df ['Cuisines' ] = le .fit_transform (df ['Cuisines' ])
357
+ df ['City' ] = le .fit_transform (df ['City' ])
358
+ df ['Country Code' ] = le .fit_transform (df ['Country Code' ])
359
+ df ['Rating color' ] = le .fit_transform (df ['Rating color' ])
360
+ df ['Has Table booking' ] = df ['Has Table booking' ].apply (lambda x : 1 if x == 'Yes' else 0 )
361
+ df ['Has Online delivery' ] = df ['Has Online delivery' ].apply (lambda x : 1 if x == 'Yes' else 0 )
362
+
363
+ features = ['Country Code' , 'City' , 'Cuisines' , 'Price range' , 'Has Table booking' , 'Has Online delivery' ]
364
+ X = df [features ]
365
+ y = df ['Aggregate rating' ]
366
+
367
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
368
+
369
+ scaler = StandardScaler ()
370
+ X_train = scaler .fit_transform (X_train )
371
+ X_test = scaler .transform (X_test )
372
+
373
+ models = {
374
+ 'Linear Regression' : LinearRegression (),
375
+ 'Decision Tree' : DecisionTreeRegressor (random_state = 42 ),
376
+ 'Random Forest' : RandomForestRegressor (random_state = 42 )
377
+ }
378
+
379
+ results = {}
380
+ for name , model in models .items ():
381
+ model .fit (X_train , y_train )
382
+ y_pred = model .predict (X_test )
383
+ mse = mean_squared_error (y_test , y_pred )
384
+ r2 = r2_score (y_test , y_pred )
385
+ results [name ] = {'MSE' : mse , 'R2' : r2 }
386
+
387
+ results_df = pd .DataFrame (results ).T
388
+
389
+ st .markdown ("### Task 2: Customer Prefernece Analysis" )
390
+
391
+ st .markdown ("- Analyse the relationship between the type of cuisine and the restaurant's rating" )
392
+ cuisine_ratings = df .groupby ('Cuisines' )['Aggregate rating' ].mean ().sort_values (ascending = False )
393
+ st .write ('Average rating by cuisine:\n ' , cuisine_ratings )
394
+
395
+ st .markdown ("- Identify the most popular cuisines based on the number of votes" )
396
+ cuisine_votes = df .groupby ('Cuisines' )['Votes' ].sum ().sort_values (ascending = False )
397
+ st .write ('Most popular cuisines based on votes:\n ' , cuisine_votes )
398
+
399
+ st .markdown ("- Determine if there are any specific cuisines that tend to receive higher ratings" )
400
+ top_cuisines = cuisine_ratings .head (10 )
401
+ st .write ('Top 10 cuisines with highest ratings:\n ' , top_cuisines )
402
+
403
+
344
404
345
405
346
406
if __name__ == '__main__' :
0 commit comments