diff --git a/aios/config/config.yaml.example b/aios/config/config.yaml.example
index aec72647..612f77c5 100644
--- a/aios/config/config.yaml.example
+++ b/aios/config/config.yaml.example
@@ -2,53 +2,60 @@
 
 # API Keys Configuration
 api_keys:
-  openai: ""  # OpenAI API key
-  gemini: ""  # Google Gemini API key
-  groq: ""    # Groq API key
+  openai: ""    # OpenAI API key
+  gemini: ""    # Google Gemini API key
+  groq: ""      # Groq API key
   anthropic: "" # Anthropic API key
   huggingface:
-    auth_token: ""  # HuggingFace auth token
-    home: ""        # Optional: HuggingFace models path
+    auth_token: ""  # Your HuggingFace auth token for authorized models
+    cache_dir: ""   # Your cache directory for saving huggingface models
 
 # LLM Configuration
 llms:
   models:
-    # - name: "gpt-4o-mini"
-    #   backend: "openai"
-    #   max_new_tokens: 1024
-    #   temperature: 1.0
-    
-    - name: "gemini-1.5-flash"
-      backend: "google"
-      max_new_tokens: 1024
-      temperature: 1.0
 
+    # OpenAI Models
     # - name: "gpt-4o-mini"
     #   backend: "openai"
-    #   max_new_tokens: 1024
-    #   temperature: 1.0
 
-    # - name: "qwen2.5:7b"
-    #   backend: "ollama"
-    #   max_new_tokens: 1024
-    #   temperature: 1.0
-    #   hostname: "http://localhost:11434" # Make sure to run ollama server
 
-    # 
-    # - name: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+    # Google Models
+    # - name: "gemini-1.5-flash"
+    #   backend: "google"
+
+
+    # Anthropic Models
+    # - name: "claude-3-opus"
+    #   backend: "anthropic"
+
+    # Ollama Models
+    - name: "qwen2.5:7b"
+      backend: "ollama"
+      hostname: "http://localhost:11434" # Make sure to run ollama server
+
+    # HuggingFace Models
+    # - name: "meta-llama/Llama-3.1-8B-Instruct"
     #   backend: "huggingface"
-    #   max_new_tokens: 1024
-    #   temperature: 1.0
-    #   max_gpu_memory: 
-      
+    #   max_gpu_memory: {0: "48GB"}  # GPU memory allocation
+    #   eval_device: "cuda:0"  # Device for model evaluation
+    
+    # vLLM Models
+    # To use vllm as backend, you need to install vllm and run the vllm server https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    # An example command to run the vllm server is:
+    # vllm serve meta-llama/Llama-3.2-3B-Instruct --port 8091
+    # - name: "meta-llama/Llama-3.1-8B-Instruct"
+    #  backend: "vllm"
+    #  hostname: "http://localhost:8091"
+
+
+
+
   log_mode: "console"
-  # use_context_manager: false
-  use_context_manager: false # set as true to enable context interrupt and switch
+  use_context_manager: false
 
 memory:
-  memory_limit: 524288 # 512KB
-  eviction_k: 3
-
+  log_mode: "console"
+  
 storage:
   root_dir: "root"
   use_vector_db: true
@@ -62,4 +69,4 @@ agent_factory:
   
 server:
   host: "localhost"
-  port: 8000
+  port: 8000
\ No newline at end of file
diff --git a/aios/config/config_manager.py b/aios/config/config_manager.py
index f4fdbc28..0c3d2765 100644
--- a/aios/config/config_manager.py
+++ b/aios/config/config_manager.py
@@ -227,6 +227,20 @@ def get_agent_factory_config(self) -> dict:
         """
         return self.config.get("agent_factory", {})
     
+    def get_server_config(self) -> dict:
+        """
+        Retrieves the server configuration settings.
+        
+        Returns:
+            dict: Dictionary containing server configurations
+            
+        Example:
+            server_config = config_manager.get_server_config()
+            host = server_config.get("host")
+            port = server_config.get("port")
+        """
+        return self.config.get("server", {})
+    
     # def get_kernel_config(self) -> dict:
     #     """Get kernel configuration"""
     #     return self.config.get("kernel", {})
diff --git a/aios/llm_core/adapter.py b/aios/llm_core/adapter.py
index ad7e4b2b..5303c6d5 100644
--- a/aios/llm_core/adapter.py
+++ b/aios/llm_core/adapter.py
@@ -1,5 +1,5 @@
 from aios.context.simple_context import SimpleContextManager
-from aios.llm_core.strategy import RouterStrategy, SequentialRouting, SmartRouting
+from aios.llm_core.routing import RouterStrategy, SequentialRouting, SmartRouting
 from aios.llm_core.local import HfLocalBackend
 from aios.utils.id_generator import generator_tool_call_id
 from cerebrum.llm.apis import LLMQuery, LLMResponse
@@ -85,7 +85,7 @@ def __init__(
         api_key: Optional[Union[str, List[str]]] = None,
         log_mode: str = "console",
         use_context_manager: bool = False,
-        strategy: Optional[RouterStrategy] = RouterStrategy.Sequential,
+        routing_strategy: Optional[RouterStrategy] = RouterStrategy.Sequential,
     ):
         """
         Initialize the LLMAdapter.
@@ -95,7 +95,7 @@ def __init__(
             api_key: API key(s) for the LLM services
             log_mode: Mode of logging the LLM processing status
             use_context_manager: Whether to use context management
-            strategy: Strategy for routing requests
+            routing_strategy: Strategy for routing requests
         """
         self.log_mode = log_mode
         self.use_context_manager = use_context_manager
@@ -106,8 +106,10 @@ def __init__(
         self._setup_api_keys()
         self._initialize_llms()
         
-        if strategy == RouterStrategy.Sequential:
+        if routing_strategy == RouterStrategy.Sequential:
             self.strategy = SequentialRouting(self.llm_configs)
+        elif routing_strategy == RouterStrategy.Smart:
+            self.strategy = SmartRouting(self.llm_configs)
 
     def _setup_api_keys(self) -> None:
         """
diff --git a/aios/llm_core/local.py b/aios/llm_core/local.py
index 49cfd9c9..0f2eefd2 100644
--- a/aios/llm_core/local.py
+++ b/aios/llm_core/local.py
@@ -105,7 +105,8 @@ def generate(
         response  = self.model.generate(
             **inputs,
             temperature=temperature,
-            max_length=max_tokens,
+            # max_length=max_tokens,
+            max_new_tokens=max_tokens,
             top_k=10,
             num_beams=4,
             early_stopping=True,
diff --git a/aios/llm_core/strategy.py b/aios/llm_core/routing.py
similarity index 99%
rename from aios/llm_core/strategy.py
rename to aios/llm_core/routing.py
index 99a21e34..d6296cf8 100644
--- a/aios/llm_core/strategy.py
+++ b/aios/llm_core/routing.py
@@ -39,6 +39,7 @@
 
 class RouterStrategy(Enum):
     Sequential = 0,
+    Smart = 1
 
 class SequentialRouting:
     """
diff --git a/runtime/launch.py b/runtime/launch.py
index cb6f0cae..a6ab59f8 100644
--- a/runtime/launch.py
+++ b/runtime/launch.py
@@ -32,6 +32,8 @@
 
 from fastapi.middleware.cors import CORSMiddleware
 
+import uvicorn
+
 # from cerebrum.llm.layer import LLMLayer as LLMConfig
 # from cerebrum.memory.layer import MemoryLayer as MemoryConfig
 # from cerebrum.storage.layer import StorageLayer as StorageConfig
@@ -631,4 +633,14 @@ async def update_config(request: Request):
         raise HTTPException(
             status_code=500,
             detail=f"Failed to update configuration: {str(e)}"
-        )
\ No newline at end of file
+        )
+
+# Add a main function to run the app directly
+if __name__ == "__main__":
+    # Get server config from config.yaml
+    server_config = config.get_server_config()
+    host = server_config.get("host", "localhost")
+    port = server_config.get("port", 8000)
+    
+    # print(f"Starting AIOS server on {host}:{port}")
+    uvicorn.run("runtime.launch:app", host=host, port=port, reload=False)
\ No newline at end of file
diff --git a/runtime/launch_kernel.sh b/runtime/launch_kernel.sh
index 31a720c8..65cdeef0 100644
--- a/runtime/launch_kernel.sh
+++ b/runtime/launch_kernel.sh
@@ -1 +1 @@
-python -m uvicorn runtime.launch:app --port 8000
+python -m runtime.launch