@@ -204,36 +204,54 @@ async def serve(self, sockets=None):
204
204
205
205
# Initialize lifespan attribute before startup
206
206
# Handle different versions of uvicorn
207
+ self .lifespan = None
208
+
207
209
try :
208
210
# Try the newer location first (uvicorn >= 0.18.0)
209
211
from uvicorn .lifespan .on import LifespanOn
210
212
self .lifespan = LifespanOn (
211
- self .config .loaded_app ,
212
- self .config .lifespan_on if hasattr (self .config , "lifespan_on" ) else self . config . lifespan ,
213
- self . config . logger
213
+ self .config .app ,
214
+ self .config .lifespan_on if hasattr (self .config , "lifespan_on" ) else "auto" ,
215
+ logger = logger
214
216
)
215
- except (ImportError , AttributeError ):
217
+ logger .info ("Using LifespanOn from uvicorn.lifespan.on" )
218
+ except (ImportError , AttributeError ) as e :
219
+ logger .debug (f"Failed to import LifespanOn: { str (e )} " )
216
220
try :
217
221
# Try the older location (uvicorn < 0.18.0)
218
222
from uvicorn .lifespan .lifespan import Lifespan
219
223
self .lifespan = Lifespan (
220
- self .config .loaded_app ,
221
- self . config . lifespan ,
222
- self . config . logger
224
+ self .config .app ,
225
+ "auto" ,
226
+ logger = logger
223
227
)
224
- except (ImportError , AttributeError ):
228
+ logger .info ("Using Lifespan from uvicorn.lifespan.lifespan" )
229
+ except (ImportError , AttributeError ) as e :
230
+ logger .debug (f"Failed to import Lifespan from lifespan.lifespan: { str (e )} " )
225
231
try :
226
232
# Try the oldest location
227
233
from uvicorn .lifespan import Lifespan
228
234
self .lifespan = Lifespan (
229
- self .config .loaded_app ,
230
- self . config . lifespan ,
231
- self . config . logger
235
+ self .config .app ,
236
+ "auto" ,
237
+ logger = logger
232
238
)
233
- except (ImportError , AttributeError ):
234
- # Fallback to no lifespan
235
- self .lifespan = None
236
- self .config .logger .warning ("Could not initialize lifespan - server may not handle startup/shutdown events properly" )
239
+ logger .info ("Using Lifespan from uvicorn.lifespan" )
240
+ except (ImportError , AttributeError ) as e :
241
+ logger .debug (f"Failed to import Lifespan from uvicorn.lifespan: { str (e )} " )
242
+ try :
243
+ # Try the newest location (uvicorn >= 0.21.0)
244
+ from uvicorn .lifespan .state import LifespanState
245
+ self .lifespan = LifespanState (
246
+ self .config .app ,
247
+ logger = logger
248
+ )
249
+ logger .info ("Using LifespanState from uvicorn.lifespan.state" )
250
+ except (ImportError , AttributeError ) as e :
251
+ logger .debug (f"Failed to import LifespanState: { str (e )} " )
252
+ # Fallback to no lifespan
253
+ self .lifespan = None
254
+ logger .warning ("Could not initialize lifespan - server may not handle startup/shutdown events properly" )
237
255
238
256
await self .startup (sockets = sockets )
239
257
@@ -342,61 +360,22 @@ def on_startup():
342
360
if startup_complete :
343
361
return
344
362
345
- # Update server status to running
363
+ # Set server status to running
346
364
set_server_status ("running" )
347
- print_running_banner (port , public_url )
348
365
349
- # Print current system instructions
350
- instructions_text = system_instructions .get_instructions ()
351
- print_system_instructions (instructions_text )
366
+ # Display the RUNNING banner
367
+ print_running_banner (__version__ )
352
368
353
- # Import here to avoid circular imports
354
- from .core .app import model_manager
355
- from .cli .config import get_config_value
369
+ # Display system resources
370
+ print_system_resources ()
356
371
357
- # Print model info if a model is loaded
358
- if model_manager .current_model :
359
- model_info = model_manager .get_model_info ()
360
- print_model_info (model_info )
361
- else :
362
- # Get model settings from config system
363
- model_id = os .environ .get ("HUGGINGFACE_MODEL" , DEFAULT_MODEL )
364
-
365
- # Get optimization settings from config system
366
- enable_quantization = get_config_value ('enable_quantization' , ENABLE_QUANTIZATION )
367
- if isinstance (enable_quantization , str ):
368
- enable_quantization = enable_quantization .lower () not in ('false' , '0' , 'none' , '' )
369
-
370
- quantization_type = get_config_value ('quantization_type' , QUANTIZATION_TYPE )
371
-
372
- enable_attention_slicing = get_config_value ('enable_attention_slicing' , ENABLE_ATTENTION_SLICING )
373
- if isinstance (enable_attention_slicing , str ):
374
- enable_attention_slicing = enable_attention_slicing .lower () not in ('false' , '0' , 'none' , '' )
375
-
376
- enable_flash_attention = get_config_value ('enable_flash_attention' , ENABLE_FLASH_ATTENTION )
377
- if isinstance (enable_flash_attention , str ):
378
- enable_flash_attention = enable_flash_attention .lower () not in ('false' , '0' , 'none' , '' )
379
-
380
- enable_better_transformer = get_config_value ('enable_better_transformer' , ENABLE_BETTERTRANSFORMER )
381
- if isinstance (enable_better_transformer , str ):
382
- enable_better_transformer = enable_better_transformer .lower () not in ('false' , '0' , 'none' , '' )
383
-
384
- # Print model settings
385
- env_model_info = {
386
- "model_id" : model_id ,
387
- "model_name" : model_id .split ("/" )[- 1 ],
388
- "parameters" : "Unknown (not loaded yet)" ,
389
- "device" : "cpu" if not torch .cuda .is_available () else f"cuda:{ torch .cuda .current_device ()} " ,
390
- "quantization" : quantization_type if enable_quantization else "None" ,
391
- "optimizations" : {
392
- "attention_slicing" : enable_attention_slicing ,
393
- "flash_attention" : enable_flash_attention ,
394
- "better_transformer" : enable_better_transformer
395
- }
396
- }
397
- print_model_info (env_model_info )
372
+ # Display model information
373
+ print_model_info ()
398
374
399
- # Print API documentation
375
+ # Display system instructions
376
+ print_system_instructions ()
377
+
378
+ # Display API documentation
400
379
print_api_docs ()
401
380
402
381
# Set flag to indicate startup is complete
@@ -435,14 +414,13 @@ async def on_startup_async():
435
414
server .on_startup_callback = on_startup # Set the callback
436
415
437
416
# Use the appropriate event loop method based on Python version
438
- if sys .version_info >= (3 , 10 ):
439
- # Python 3.10+ - use get_event_loop
417
+ try :
418
+ asyncio .run (server .serve ())
419
+ except RuntimeError as e :
420
+ # Handle "Event loop is already running" error
421
+ if "Event loop is already running" in str (e ):
422
+ logger .warning ("Event loop is already running. Using get_event_loop instead." )
440
423
asyncio .get_event_loop ().run_until_complete (server .serve ())
441
- else :
442
- # Python 3.9 and below - use new_event_loop
443
- loop = asyncio .new_event_loop ()
444
- asyncio .set_event_loop (loop )
445
- loop .run_until_complete (server .serve ())
446
424
else :
447
425
# Local environment
448
426
logger .info (f"Starting server on port { port } (local mode)" )
@@ -469,21 +447,12 @@ async def on_startup_async():
469
447
logger .warning ("Event loop is already running. Using get_event_loop instead." )
470
448
asyncio .get_event_loop ().run_until_complete (server .serve ())
471
449
else :
450
+ # Re-raise other errors
472
451
raise
473
452
except Exception as e :
474
- # Update server status on error
475
- set_server_status ("error" )
476
-
477
- # Clean up ngrok if server fails to start
478
- if use_ngrok and public_url :
479
- try :
480
- from pyngrok import ngrok
481
- ngrok .disconnect (public_url )
482
- except Exception as ngrok_e :
483
- logger .error (f"Failed to disconnect ngrok: { str (ngrok_e )} " )
484
-
485
453
logger .error (f"Server startup failed: { str (e )} " )
486
454
logger .error (traceback .format_exc ())
455
+ set_server_status ("error" )
487
456
raise
488
457
489
458
def cli ():
0 commit comments